diff --git a/src/trainer/model.py b/src/trainer/model.py index dbb4f05..088d0fa 100644 --- a/src/trainer/model.py +++ b/src/trainer/model.py @@ -399,7 +399,7 @@ class MoEModel(nn.Module): # ---------- 学习率调度(仅当使用默认优化器且未传入自定义调度函数时)---------- if created_optimizer and lr_schedule is None: - if processed_batches <= 1000: + if processed_batches <= 8000: new_lr = 1e-4 else: new_lr = 6e-6 @@ -451,7 +451,7 @@ class MoEModel(nn.Module): {"loss": avg_loss, "acc": acc}, ) logger.info( - f"step: {global_step}, loss: {avg_loss:.4f}, acc: {acc}" + f"step: {global_step}, loss: {avg_loss:.4f}, acc: {acc:.4f}" ) batch_loss_sum = 0.0