From 335540d8c2cbe84bb9e0897fde2134d35be5e82d Mon Sep 17 00:00:00 2001 From: songsenand Date: Fri, 13 Feb 2026 12:12:12 +0800 Subject: [PATCH] =?UTF-8?q?=E8=B0=83=E6=95=B4=E5=AD=A6=E4=B9=A0=E7=8E=87?= =?UTF-8?q?=E9=98=88=E5=80=BC=E5=B9=B6=E4=BC=98=E5=8C=96=E6=97=A5=E5=BF=97?= =?UTF-8?q?=E8=BE=93=E5=87=BA=E7=B2=BE=E5=BA=A6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/trainer/model.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/trainer/model.py b/src/trainer/model.py index dbb4f05..088d0fa 100644 --- a/src/trainer/model.py +++ b/src/trainer/model.py @@ -399,7 +399,7 @@ class MoEModel(nn.Module): # ---------- 学习率调度(仅当使用默认优化器且未传入自定义调度函数时)---------- if created_optimizer and lr_schedule is None: - if processed_batches <= 1000: + if processed_batches <= 8000: new_lr = 1e-4 else: new_lr = 6e-6 @@ -451,7 +451,7 @@ class MoEModel(nn.Module): {"loss": avg_loss, "acc": acc}, ) logger.info( - f"step: {global_step}, loss: {avg_loss:.4f}, acc: {acc}" + f"step: {global_step}, loss: {avg_loss:.4f}, acc: {acc:.4f}" ) batch_loss_sum = 0.0