From 5dda0e6f85937f8bed540bee4ed57d056b902660 Mon Sep 17 00:00:00 2001
From: songsenand <songsenand@163.com>
Date: Wed, 8 Apr 2026 00:21:15 +0800
Subject: [PATCH] =?UTF-8?q?feat(BigExpert):=20=E6=B7=BB=E5=8A=A0=20torch.c?=
 =?UTF-8?q?ompile=20=E6=94=AF=E6=8C=81=E5=B9=B6=E4=BC=98=E5=8C=96=E7=BC=96?=
 =?UTF-8?q?=E8=AF=91=E5=8F=82=E6=95=B0?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 big_expert.py      | 27 +++++++++++++++++++++++++++
 src/model/model.py | 11 +++++------
 2 files changed, 32 insertions(+), 6 deletions(-)

diff --git a/big_expert.py b/big_expert.py
index 500a80a..7837e93 100644
--- a/big_expert.py
+++ b/big_expert.py
@@ -1,8 +1,35 @@
+import torch
+
+from model.components import MoELayer
 from model.model import InputMethodEngine
 
 
 class BigExpert(InputMethodEngine):
     def __init__(self, *args, **kw):
+        if "compile" in kw:
+            compile = kw.pop("compile")
+
+        else:
+            compile = False
+        kw["compile"] = False
         super().__init__(*args, **kw)
+        if "dim" in kw:
+            dim = kw["dim"]
+        else:
+            dim = 512
 
         self.moe = MoELayer(dim=dim, num_experts=40, top_k=3)
+
+        if compile:
+            self.forward = torch.compile(
+                self.forward,
+                # mode="reduce-overhead",
+                fullgraph=False,
+                dynamic=False,
+                options={
+                   "epilogue_fusion": True,
+                   "max_autotune": True,
+                   "triton.cudagraphs": True,
+                   "reorder_for_compute_comm_overlap": False,
+                },
+            )
diff --git a/src/model/model.py b/src/model/model.py
index ae050e7..f834843 100644
--- a/src/model/model.py
+++ b/src/model/model.py
@@ -82,15 +82,14 @@ class InputMethodEngine(nn.Module):
         if compile:
             self.forward = torch.compile(
                 self.forward,
-                mode="reduce-overhead",
+                # mode="reduce-overhead",
                 fullgraph=False,
                 dynamic=False,
                 options={
-                    "epilogue_fusion": True,
-                    "max_autotune": True,  # 启用自动调优
-                    "triton.cudagraphs": True,
-                    # 尝试控制归约策略
-                    "reorder_for_compute_comm_overlap": False,
+                   "epilogue_fusion": True,
+                   "max_autotune": True,
+                   "triton.cudagraphs": True,
+                   "reorder_for_compute_comm_overlap": False,
                 },
             )