diff --git a/big_expert.py b/big_expert.py index 500a80a..7837e93 100644 --- a/big_expert.py +++ b/big_expert.py @@ -1,8 +1,35 @@ +import torch + +from model.components import MoELayer from model.model import InputMethodEngine class BigExpert(InputMethodEngine): def __init__(self, *args, **kw): + if "compile" in kw: + compile = kw.pop("compile") + + else: + compile = False + kw["compile"] = False super().__init__(*args, **kw) + if "dim" in kw: + dim = kw["dim"] + else: + dim = 512 self.moe = MoELayer(dim=dim, num_experts=40, top_k=3) + + if compile: + self.forward = torch.compile( + self.forward, + # mode="reduce-overhead", + fullgraph=False, + dynamic=False, + options={ + "epilogue_fusion": True, + "max_autotune": True, + "triton.cudagraphs": True, + "reorder_for_compute_comm_overlap": False, + }, + ) diff --git a/src/model/model.py b/src/model/model.py index ae050e7..f834843 100644 --- a/src/model/model.py +++ b/src/model/model.py @@ -82,15 +82,14 @@ class InputMethodEngine(nn.Module): if compile: self.forward = torch.compile( self.forward, - mode="reduce-overhead", + # mode="reduce-overhead", fullgraph=False, dynamic=False, options={ - "epilogue_fusion": True, - "max_autotune": True, # 启用自动调优 - "triton.cudagraphs": True, - # 尝试控制归约策略 - "reorder_for_compute_comm_overlap": False, + "epilogue_fusion": True, + "max_autotune": True, + "triton.cudagraphs": True, + "reorder_for_compute_comm_overlap": False, }, )