修复初始步骤损失计算逻辑

This commit is contained in:
songsenand 2026-02-20 23:30:35 +08:00
parent 4560a9ed06
commit 17324ffa10
1 changed files with 2 additions and 0 deletions

View File

@ -579,6 +579,8 @@ class MoEModel(nn.Module):
):
avg_loss = batch_loss_sum / eval_frequency
acc, eval_loss = self.model_eval(eval_dataloader, criterion)
if global_step == 0:
avg_loss = eval_loss
super().train()
if monitor is not None:
monitor.add_step(