diff --git a/src/trainer/model.py b/src/trainer/model.py index e78c4ec..dbb4f05 100644 --- a/src/trainer/model.py +++ b/src/trainer/model.py @@ -437,13 +437,12 @@ class MoEModel(nn.Module): global_step += 1 original_loss = loss.item() * grad_accum_steps batch_loss_sum += original_loss - # 周期性评估(与原代码相同) if ( eval_dataloader is not None and global_step % eval_frequency == 0 ): - avg_loss = batch_loss_sum / global_step + avg_loss = batch_loss_sum / eval_frequency acc, _ = self.model_eval(eval_dataloader, criterion) super().train() if monitor is not None: