Small refactor DeepEPMode to clean up code a bit (#4992)

This commit is contained in:
fzyzcjy
2025-04-03 17:56:44 +08:00
committed by GitHub
parent e8999b13b7
commit 8e10fec9a8
5 changed files with 44 additions and 30 deletions

View File

@@ -20,7 +20,7 @@ import logging
import os
import random
import tempfile
from typing import List, Optional
from typing import List, Literal, Optional
from sglang.srt.hf_transformers_utils import check_gguf_file
from sglang.srt.reasoning_parser import ReasoningParser
@@ -161,7 +161,7 @@ class ServerArgs:
enable_dp_attention: bool = False
enable_ep_moe: bool = False
enable_deepep_moe: bool = False
deepep_mode: Optional[str] = "auto"
deepep_mode: Optional[Literal["auto", "normal", "low_latency"]] = "auto"
enable_torch_compile: bool = False
torch_compile_max_bs: int = 32
cuda_graph_max_bs: Optional[int] = None