diff --git a/src/trainer/model.py b/src/trainer/model.py index 8183dcc..0852548 100644 --- a/src/trainer/model.py +++ b/src/trainer/model.py @@ -7,6 +7,8 @@ from modelscope import AutoModel import pickle from importlib.resources import files +from loguru import logger + from tqdm import tqdm from .monitor import TrainingMonitor @@ -426,16 +428,20 @@ class MoEModel(nn.Module): and global_step % eval_frequency == 0 ): acc, _ = self.model_eval(eval_dataloader, criterion) + super().train() if monitor is not None: monitor.add_step( global_step, {"loss": loss.item() * grad_accum_steps, "acc": acc}, ) + logger.info({"loss": loss.item() * grad_accum_steps, "acc": acc}) + elif monitor is not None: # 仅记录训练损失 monitor.add_step( global_step, {"loss": loss.item() * grad_accum_steps} ) + logger.info({"loss": loss.item() * grad_accum_steps}) # ============================ 使用示例 ============================