diff --git a/python/sglang/srt/layers/torchao_utils.py b/python/sglang/srt/layers/torchao_utils.py index 46b082401..9395cdf27 100644 --- a/python/sglang/srt/layers/torchao_utils.py +++ b/python/sglang/srt/layers/torchao_utils.py @@ -62,6 +62,8 @@ def torchao_quantize_param_data(param: torch.Tensor, torchao_config: str): granularity=GRANULARITY_MAP[granularity] ), ) + else: + raise ValueError(f"Unexpected config: {torchao_config}") return dummy_linear.weight diff --git a/python/sglang/srt/server_args.py b/python/sglang/srt/server_args.py index 4a1cad89e..5487f772f 100644 --- a/python/sglang/srt/server_args.py +++ b/python/sglang/srt/server_args.py @@ -661,7 +661,7 @@ class ServerArgs: "--torchao-config", type=str, default=ServerArgs.torchao_config, - help="Optimize the model with torchao. Experimental feature. Current choices are: int8dq, int8wo, int4wo-, fp8wo", + help="Optimize the model with torchao. Experimental feature. Current choices are: int8dq, int8wo, int4wo-, fp8wo, fp8dq-per_tensor, fp8dq-per_row", ) parser.add_argument( "--enable-nan-detection",