diff --git a/src/trainer/model.py b/src/trainer/model.py index 933d45c..0132f02 100644 --- a/src/trainer/model.py +++ b/src/trainer/model.py @@ -455,7 +455,7 @@ class MoEModel(nn.Module): scaler = amp.GradScaler(enabled=mixed_precision) - total_steps = max(stop_batch, 2e5) + total_steps = stop_batch warmup_steps = int(total_steps * warmup_ratio) logger.info(f"Training Start: Steps={total_steps}, Warmup={warmup_steps}") processed_batches = 0