Skip to content

LightGBM model giving different results in different environments #7102

@sumeetlohar7

Description

@sumeetlohar7

Description

I am training LightGBM model on same training data in dev environment and staging environment on VertexAI. I am generating a forecast for 13 weeks at a weekly level, but the forecasts from both environments have a different of upto 40% in few weeks.

Reproducible example

def train_size_model(df, feature_cols, date_col, test_horizon, train_till, category_cols, TUNING_SETTINGS, LGBM_SEARCH_SPACE, params):
"""
Train regressor on rows where itm_ord_qty>0 (conditional size model) using LightGBM.
Returns: model, val_df (with pred_size), metrics dict (MAE on positive rows), best_params
"""

df = df.copy()
df[date_col] = pd.to_datetime(df[date_col], errors="coerce")
df = df[df[date_col] <= train_till]

cutoff_date = pd.to_datetime(train_till) - pd.Timedelta(weeks=test_horizon)

for col in category_cols:
    df[col] = df[col].astype("category")

train_df = df[df[date_col] <= cutoff_date]
val_df = df[df[date_col] > cutoff_date]

train_pos = train_df[train_df["itm_ord_qty"] > 0]
val_pos = val_df[val_df["itm_ord_qty"] > 0]

if train_pos.empty or val_pos.empty:
    raise ValueError("Not enough positive rows to train/evaluate regressor. Need >0 positive rows in both train and val.")

X_train = train_pos[feature_cols]
y_train = train_pos["itm_ord_qty"]
X_val = val_pos[feature_cols]
y_val = val_pos["itm_ord_qty"]

# --- Optuna param builder ---
def build_trial_params(trial):
    trial_params = {}
    for pname, spec in LGBM_SEARCH_SPACE.items():
        trial_params[pname] = _sample_from_spec(trial, pname, spec)
    return trial_params

# --- Optuna objective (LightGBM) ---
def objective_lgbm(trial):
    trial_params = build_trial_params(trial)
    merged = dict(params)
    merged.update(trial_params)

    # keep only valid LightGBM parameters
    allowed_keys = {
        "boosting_type", "objective", "metric", "learning_rate",
        "num_leaves", "n_estimators", "max_depth", "min_data_in_leaf",
        "feature_fraction", "bagging_fraction", "bagging_freq",
        "lambda_l1", "lambda_l2", "random_state", "n_jobs"
    }
    merged = {k: v for k, v in merged.items() if k in allowed_keys}
    merged.setdefault("verbosity", -1)

    stopping_rounds = 50
    model = LGBMRegressor(**merged, random_state=42, deterministic=True)    #123

    callbacks = [
        lgb.early_stopping(stopping_rounds),
        lgb.log_evaluation(period=0)
    ]

    model.fit(
        X_train, y_train,
        eval_set=[(X_val, y_val)],
        eval_metric="l1",
        callbacks=callbacks,
        categorical_feature=category_cols,
    )

    pred = model.predict(X_val)
    return mean_absolute_error(y_val, pred)

# --- Optuna study ---
sampler = optuna.samplers.TPESampler(seed=TUNING_SETTINGS.get("random_state", 42))
pruner = MedianPruner(n_startup_trials=5, n_warmup_steps=5)
study = optuna.create_study(direction="minimize", sampler=sampler, pruner=pruner)
study.optimize(objective_lgbm, n_trials=TUNING_SETTINGS["n_trials"], n_jobs=TUNING_SETTINGS.get("n_jobs", 1), show_progress_bar=True)

best_params = study.best_params

# --- Final params (clean + silent) ---
final_params = dict(params)
final_params.update(best_params)
allowed_keys = {
    "boosting_type", "objective", "metric", "learning_rate",
    "num_leaves", "n_estimators", "max_depth", "min_data_in_leaf",
    "feature_fraction", "bagging_fraction", "bagging_freq",
    "lambda_l1", "lambda_l2", "random_state", "n_jobs"
}
final_params = {k: v for k, v in final_params.items() if k in allowed_keys}
final_params.setdefault("verbosity", -1)
final_params.setdefault("n_estimators", 1000)
final_params.setdefault("learning_rate", 0.05)
# final_params.setdefault("random_state", TUNING_SETTINGS.get("random_state", 42))
final_params.setdefault("n_jobs", TUNING_SETTINGS.get("n_jobs", 1))

# --- Final model ---
model = LGBMRegressor(**final_params,random_state=42, deterministic=True)    #123

callbacks = [
    lgb.early_stopping(50),
    lgb.log_evaluation(period=0)
]

model.fit(
    X_train, y_train,
    eval_set=[(X_val, y_val)],
    eval_metric="l1",
    callbacks=callbacks,
    categorical_feature=category_cols,
)


 # Use the training dataframe used to build X_train; here train_pos holds it
try:
    _save_cat_mapping_on_model(model, train_pos, category_cols)
except Exception:
    # fallback: try saving from X_train if train_pos isn't available in this scope
    _save_cat_mapping_on_model(model, X_train, category_cols)
    
    
# --- Predictions & metrics ---
pred_val = model.predict(X_val)
val_pos_copy = val_pos.copy()
val_pos_copy["pred_size"] = pred_val

mae = mean_absolute_error(y_val, pred_val)
mape = (np.abs(y_val - pred_val) / (y_val.abs() + 1e-9)).mean()

metrics = {
    "mae_conditional": mae,
    "mape_conditional": mape,
    "train_pos_rows": len(train_pos),
    "val_pos_rows": len(val_pos),
}

return model, val_pos_copy, metrics, best_params

Environment info

LightGBM version or commit hash: lightgbm==4.5.0

Command(s) you used to install LightGBM
pip install requirements.txt -r

Running on dev and stg env on VertexAI
Other libraries used:
pandas==2.2.2
numpy==1.26.4
prophet==1.1.5
scikit-learn==1.5.1
hyperopt==0.2.7
plotly==5.24.1
google-cloud-bigquery>=3.31.0
google-cloud-bigquery-storage>=2.25.0
db-dtypes>=1.4.3
statsmodels==0.14.4
tqdm>=4.67.0
scipy==1.13.1
lightgbm==4.5.0
optuna==4.1.0
pyarrow>=15.0.2
catboost
Installed through requirements

Additional Comments

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions