Remove --modelopt-config in server_args (#2758)
This commit is contained in:
@@ -150,7 +150,6 @@ class ModelRunner:
|
|||||||
"enable_nan_detection": server_args.enable_nan_detection,
|
"enable_nan_detection": server_args.enable_nan_detection,
|
||||||
"enable_dp_attention": server_args.enable_dp_attention,
|
"enable_dp_attention": server_args.enable_dp_attention,
|
||||||
"enable_ep_moe": server_args.enable_ep_moe,
|
"enable_ep_moe": server_args.enable_ep_moe,
|
||||||
"modelopt_config": server_args.modelopt_config,
|
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|||||||
@@ -149,7 +149,6 @@ class ServerArgs:
|
|||||||
torch_compile_max_bs: int = 32
|
torch_compile_max_bs: int = 32
|
||||||
cuda_graph_max_bs: Optional[int] = None
|
cuda_graph_max_bs: Optional[int] = None
|
||||||
torchao_config: str = ""
|
torchao_config: str = ""
|
||||||
modelopt_config: str = ""
|
|
||||||
enable_nan_detection: bool = False
|
enable_nan_detection: bool = False
|
||||||
enable_p2p_check: bool = False
|
enable_p2p_check: bool = False
|
||||||
triton_attention_reduce_in_fp32: bool = False
|
triton_attention_reduce_in_fp32: bool = False
|
||||||
@@ -810,12 +809,6 @@ class ServerArgs:
|
|||||||
default=ServerArgs.torchao_config,
|
default=ServerArgs.torchao_config,
|
||||||
help="Optimize the model with torchao. Experimental feature. Current choices are: int8dq, int8wo, int4wo-<group_size>, fp8wo, fp8dq-per_tensor, fp8dq-per_row",
|
help="Optimize the model with torchao. Experimental feature. Current choices are: int8dq, int8wo, int4wo-<group_size>, fp8wo, fp8dq-per_tensor, fp8dq-per_row",
|
||||||
)
|
)
|
||||||
parser.add_argument(
|
|
||||||
"--modelopt-config",
|
|
||||||
type=str,
|
|
||||||
default=ServerArgs.modelopt_config,
|
|
||||||
help="Optimize the model with nvidia-modelopt. Experimental feature. Current choices are: fp8",
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--enable-nan-detection",
|
"--enable-nan-detection",
|
||||||
action="store_true",
|
action="store_true",
|
||||||
|
|||||||
Reference in New Issue
Block a user