From 4560a9ed06bc94365864ae7b9e439fc4292db0b4 Mon Sep 17 00:00:00 2001 From: songsenand Date: Fri, 20 Feb 2026 23:28:35 +0800 Subject: [PATCH] =?UTF-8?q?=E7=A7=BB=E9=99=A4=20global=5Fstep=20=E8=87=AA?= =?UTF-8?q?=E5=A2=9E=E9=80=BB=E8=BE=91=E5=B9=B6=E8=B0=83=E6=95=B4=E8=87=B3?= =?UTF-8?q?=E5=BE=AA=E7=8E=AF=E6=9C=AB=E5=B0=BE?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/trainer/model.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/trainer/model.py b/src/trainer/model.py index 54ed892..bfdc1ff 100644 --- a/src/trainer/model.py +++ b/src/trainer/model.py @@ -570,7 +570,6 @@ class MoEModel(nn.Module): scaler.step(optimizer) scaler.update() optimizer.zero_grad() - global_step += 1 original_loss = loss.item() * grad_accum_steps batch_loss_sum += original_loss # 周期性评估(与原代码相同) @@ -592,6 +591,7 @@ class MoEModel(nn.Module): batch_loss_sum = 0.0 if processed_batches >= stop_batch: break + global_step += 1 def load_from_state_dict(self, state_dict_path: Union[str, Path]): state_dict = torch.load(