From 5dda0e6f85937f8bed540bee4ed57d056b902660 Mon Sep 17 00:00:00 2001 From: songsenand Date: Wed, 8 Apr 2026 00:21:15 +0800 Subject: [PATCH] =?UTF-8?q?feat(BigExpert):=20=E6=B7=BB=E5=8A=A0=20torch.c?= =?UTF-8?q?ompile=20=E6=94=AF=E6=8C=81=E5=B9=B6=E4=BC=98=E5=8C=96=E7=BC=96?= =?UTF-8?q?=E8=AF=91=E5=8F=82=E6=95=B0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- big_expert.py | 27 +++++++++++++++++++++++++++ src/model/model.py | 11 +++++------ 2 files changed, 32 insertions(+), 6 deletions(-) diff --git a/big_expert.py b/big_expert.py index 500a80a..7837e93 100644 --- a/big_expert.py +++ b/big_expert.py @@ -1,8 +1,35 @@ +import torch + +from model.components import MoELayer from model.model import InputMethodEngine class BigExpert(InputMethodEngine): def __init__(self, *args, **kw): + if "compile" in kw: + compile = kw.pop("compile") + + else: + compile = False + kw["compile"] = False super().__init__(*args, **kw) + if "dim" in kw: + dim = kw["dim"] + else: + dim = 512 self.moe = MoELayer(dim=dim, num_experts=40, top_k=3) + + if compile: + self.forward = torch.compile( + self.forward, + # mode="reduce-overhead", + fullgraph=False, + dynamic=False, + options={ + "epilogue_fusion": True, + "max_autotune": True, + "triton.cudagraphs": True, + "reorder_for_compute_comm_overlap": False, + }, + ) diff --git a/src/model/model.py b/src/model/model.py index ae050e7..f834843 100644 --- a/src/model/model.py +++ b/src/model/model.py @@ -82,15 +82,14 @@ class InputMethodEngine(nn.Module): if compile: self.forward = torch.compile( self.forward, - mode="reduce-overhead", + # mode="reduce-overhead", fullgraph=False, dynamic=False, options={ - "epilogue_fusion": True, - "max_autotune": True, # 启用自动调优 - "triton.cudagraphs": True, - # 尝试控制归约策略 - "reorder_for_compute_comm_overlap": False, + "epilogue_fusion": True, + "max_autotune": True, + "triton.cudagraphs": True, + "reorder_for_compute_comm_overlap": False, }, )