Misc fix for min_p_sampling, --cuda-graph-bs (#2761)

This commit is contained in:
Lianmin Zheng
2025-01-07 02:52:53 -08:00
committed by GitHub
parent 6d08ce2aa9
commit bdc1acf6cd
17 changed files with 135 additions and 63 deletions

View File

@@ -148,6 +148,7 @@ class ServerArgs:
enable_torch_compile: bool = False
torch_compile_max_bs: int = 32
cuda_graph_max_bs: Optional[int] = None
cuda_graph_bs: Optional[List[int]] = None
torchao_config: str = ""
enable_nan_detection: bool = False
enable_p2p_check: bool = False
@@ -803,6 +804,12 @@ class ServerArgs:
default=ServerArgs.cuda_graph_max_bs,
help="Set the maximum batch size for cuda graph.",
)
parser.add_argument(
"--cuda-graph-bs",
type=int,
nargs="+",
help="Set the list of batch sizes for cuda graph.",
)
parser.add_argument(
"--torchao-config",
type=str,