Misc fix for min_p_sampling, --cuda-graph-bs (#2761)
This commit is contained in:
@@ -148,6 +148,7 @@ class ServerArgs:
|
||||
enable_torch_compile: bool = False
|
||||
torch_compile_max_bs: int = 32
|
||||
cuda_graph_max_bs: Optional[int] = None
|
||||
cuda_graph_bs: Optional[List[int]] = None
|
||||
torchao_config: str = ""
|
||||
enable_nan_detection: bool = False
|
||||
enable_p2p_check: bool = False
|
||||
@@ -803,6 +804,12 @@ class ServerArgs:
|
||||
default=ServerArgs.cuda_graph_max_bs,
|
||||
help="Set the maximum batch size for cuda graph.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--cuda-graph-bs",
|
||||
type=int,
|
||||
nargs="+",
|
||||
help="Set the list of batch sizes for cuda graph.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--torchao-config",
|
||||
type=str,
|
||||
|
||||
Reference in New Issue
Block a user