-
Notifications
You must be signed in to change notification settings - Fork 4k
Description
Description
I am training LightGBM model on same training data in dev environment and staging environment on VertexAI. I am generating a forecast for 13 weeks at a weekly level, but the forecasts from both environments have a different of upto 40% in few weeks.
Reproducible example
def train_size_model(df, feature_cols, date_col, test_horizon, train_till, category_cols, TUNING_SETTINGS, LGBM_SEARCH_SPACE, params):
"""
Train regressor on rows where itm_ord_qty>0 (conditional size model) using LightGBM.
Returns: model, val_df (with pred_size), metrics dict (MAE on positive rows), best_params
"""
df = df.copy()
df[date_col] = pd.to_datetime(df[date_col], errors="coerce")
df = df[df[date_col] <= train_till]
cutoff_date = pd.to_datetime(train_till) - pd.Timedelta(weeks=test_horizon)
for col in category_cols:
df[col] = df[col].astype("category")
train_df = df[df[date_col] <= cutoff_date]
val_df = df[df[date_col] > cutoff_date]
train_pos = train_df[train_df["itm_ord_qty"] > 0]
val_pos = val_df[val_df["itm_ord_qty"] > 0]
if train_pos.empty or val_pos.empty:
raise ValueError("Not enough positive rows to train/evaluate regressor. Need >0 positive rows in both train and val.")
X_train = train_pos[feature_cols]
y_train = train_pos["itm_ord_qty"]
X_val = val_pos[feature_cols]
y_val = val_pos["itm_ord_qty"]
# --- Optuna param builder ---
def build_trial_params(trial):
trial_params = {}
for pname, spec in LGBM_SEARCH_SPACE.items():
trial_params[pname] = _sample_from_spec(trial, pname, spec)
return trial_params
# --- Optuna objective (LightGBM) ---
def objective_lgbm(trial):
trial_params = build_trial_params(trial)
merged = dict(params)
merged.update(trial_params)
# keep only valid LightGBM parameters
allowed_keys = {
"boosting_type", "objective", "metric", "learning_rate",
"num_leaves", "n_estimators", "max_depth", "min_data_in_leaf",
"feature_fraction", "bagging_fraction", "bagging_freq",
"lambda_l1", "lambda_l2", "random_state", "n_jobs"
}
merged = {k: v for k, v in merged.items() if k in allowed_keys}
merged.setdefault("verbosity", -1)
stopping_rounds = 50
model = LGBMRegressor(**merged, random_state=42, deterministic=True) #123
callbacks = [
lgb.early_stopping(stopping_rounds),
lgb.log_evaluation(period=0)
]
model.fit(
X_train, y_train,
eval_set=[(X_val, y_val)],
eval_metric="l1",
callbacks=callbacks,
categorical_feature=category_cols,
)
pred = model.predict(X_val)
return mean_absolute_error(y_val, pred)
# --- Optuna study ---
sampler = optuna.samplers.TPESampler(seed=TUNING_SETTINGS.get("random_state", 42))
pruner = MedianPruner(n_startup_trials=5, n_warmup_steps=5)
study = optuna.create_study(direction="minimize", sampler=sampler, pruner=pruner)
study.optimize(objective_lgbm, n_trials=TUNING_SETTINGS["n_trials"], n_jobs=TUNING_SETTINGS.get("n_jobs", 1), show_progress_bar=True)
best_params = study.best_params
# --- Final params (clean + silent) ---
final_params = dict(params)
final_params.update(best_params)
allowed_keys = {
"boosting_type", "objective", "metric", "learning_rate",
"num_leaves", "n_estimators", "max_depth", "min_data_in_leaf",
"feature_fraction", "bagging_fraction", "bagging_freq",
"lambda_l1", "lambda_l2", "random_state", "n_jobs"
}
final_params = {k: v for k, v in final_params.items() if k in allowed_keys}
final_params.setdefault("verbosity", -1)
final_params.setdefault("n_estimators", 1000)
final_params.setdefault("learning_rate", 0.05)
# final_params.setdefault("random_state", TUNING_SETTINGS.get("random_state", 42))
final_params.setdefault("n_jobs", TUNING_SETTINGS.get("n_jobs", 1))
# --- Final model ---
model = LGBMRegressor(**final_params,random_state=42, deterministic=True) #123
callbacks = [
lgb.early_stopping(50),
lgb.log_evaluation(period=0)
]
model.fit(
X_train, y_train,
eval_set=[(X_val, y_val)],
eval_metric="l1",
callbacks=callbacks,
categorical_feature=category_cols,
)
# Use the training dataframe used to build X_train; here train_pos holds it
try:
_save_cat_mapping_on_model(model, train_pos, category_cols)
except Exception:
# fallback: try saving from X_train if train_pos isn't available in this scope
_save_cat_mapping_on_model(model, X_train, category_cols)
# --- Predictions & metrics ---
pred_val = model.predict(X_val)
val_pos_copy = val_pos.copy()
val_pos_copy["pred_size"] = pred_val
mae = mean_absolute_error(y_val, pred_val)
mape = (np.abs(y_val - pred_val) / (y_val.abs() + 1e-9)).mean()
metrics = {
"mae_conditional": mae,
"mape_conditional": mape,
"train_pos_rows": len(train_pos),
"val_pos_rows": len(val_pos),
}
return model, val_pos_copy, metrics, best_params
Environment info
LightGBM version or commit hash: lightgbm==4.5.0
Command(s) you used to install LightGBM
pip install requirements.txt -r
Running on dev and stg env on VertexAI
Other libraries used:
pandas==2.2.2
numpy==1.26.4
prophet==1.1.5
scikit-learn==1.5.1
hyperopt==0.2.7
plotly==5.24.1
google-cloud-bigquery>=3.31.0
google-cloud-bigquery-storage>=2.25.0
db-dtypes>=1.4.3
statsmodels==0.14.4
tqdm>=4.67.0
scipy==1.13.1
lightgbm==4.5.0
optuna==4.1.0
pyarrow>=15.0.2
catboost
Installed through requirements