Support Deepseek MoE Model (#689)

2024-07-21 03:09:29 -07:00
parent 5a4ef2b5c8
commit eedc12e12e
5 changed files with 519 additions and 23 deletions
--- a/python/sglang/srt/server.py
+++ b/python/sglang/srt/server.py
@@ -167,7 +167,7 @@ def _set_torch_compile_config():
    torch._inductor.config.fx_graph_cache = True  # Experimental feature to reduce compilation times, will be on by default in future

    # FIXME: tmp workaround
-    torch._dynamo.config.accumulated_cache_size_limit = 128
+    torch._dynamo.config.accumulated_cache_size_limit = 256


 def launch_server(