From 17324ffa10338c3e1b1c9a8f29e340ae11fdf2b5 Mon Sep 17 00:00:00 2001 From: songsenand Date: Fri, 20 Feb 2026 23:30:35 +0800 Subject: [PATCH] =?UTF-8?q?=E4=BF=AE=E5=A4=8D=E5=88=9D=E5=A7=8B=E6=AD=A5?= =?UTF-8?q?=E9=AA=A4=E6=8D=9F=E5=A4=B1=E8=AE=A1=E7=AE=97=E9=80=BB=E8=BE=91?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/trainer/model.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/trainer/model.py b/src/trainer/model.py index bfdc1ff..362e6cc 100644 --- a/src/trainer/model.py +++ b/src/trainer/model.py @@ -579,6 +579,8 @@ class MoEModel(nn.Module): ): avg_loss = batch_loss_sum / eval_frequency acc, eval_loss = self.model_eval(eval_dataloader, criterion) + if global_step == 0: + avg_loss = eval_loss super().train() if monitor is not None: monitor.add_step(