From 71ef54e3d400e85f777a8e58efcf79a93ab4ac7b Mon Sep 17 00:00:00 2001 From: songsenand Date: Fri, 15 May 2026 14:47:31 +0800 Subject: [PATCH] =?UTF-8?q?fix(trainer):=20=E4=BD=BF=E7=94=A8=E5=9B=BA?= =?UTF-8?q?=E5=AE=9A=E6=9C=80=E5=A4=A7=E5=BA=8F=E5=88=97=E9=95=BF=E5=BA=A6?= =?UTF-8?q?=E7=9A=84collate=E5=87=BD=E6=95=B0=E4=BB=A5=E9=81=BF=E5=85=8D?= =?UTF-8?q?=E5=86=85=E5=AD=98=E9=97=AE=E9=A2=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/model/trainer.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/model/trainer.py b/src/model/trainer.py index cbea273..f20ef98 100644 --- a/src/model/trainer.py +++ b/src/model/trainer.py @@ -1100,13 +1100,14 @@ def create_dataloader( ) logger.info(f"📊 使用标准DataLoader,worker数量: {num_workers}") + fixed_max_seq_length = getattr(dataset, "max_seq_length", 128) dataloader = DataLoader( dataset, batch_size=batch_size, num_workers=num_workers, pin_memory=pin_memory, worker_init_fn=worker_init_fn, - collate_fn=collate_fn, + collate_fn=preprocess_collate_fn(fixed_max_seq_length), prefetch_factor=2, # 减少预取以避免内存问题 persistent_workers=True, shuffle=shuffle,