From 43c8349d51ae67494866be8927be869a0a32d41e Mon Sep 17 00:00:00 2001 From: songsenand Date: Thu, 26 Feb 2026 14:36:25 +0800 Subject: [PATCH] =?UTF-8?q?fix(trainer):=20=E7=A7=BB=E9=99=A4=E5=9B=BA?= =?UTF-8?q?=E5=AE=9A=E6=80=BB=E6=AD=A5=E6=95=B0=EF=BC=8C=E4=BD=BF=E7=94=A8?= =?UTF-8?q?=E5=AE=9E=E9=99=85=E5=81=9C=E6=AD=A2=E6=89=B9=E6=AC=A1=E8=AE=A1?= =?UTF-8?q?=E7=AE=97=20warmup=20=E6=AD=A5=E6=95=B0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/trainer/model.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/trainer/model.py b/src/trainer/model.py index 933d45c..0132f02 100644 --- a/src/trainer/model.py +++ b/src/trainer/model.py @@ -455,7 +455,7 @@ class MoEModel(nn.Module): scaler = amp.GradScaler(enabled=mixed_precision) - total_steps = max(stop_batch, 2e5) + total_steps = stop_batch warmup_steps = int(total_steps * warmup_ratio) logger.info(f"Training Start: Steps={total_steps}, Warmup={warmup_steps}") processed_batches = 0