Rename arguments --disable-nan-detection to --enable-nan-detection (#2066)

This commit is contained in:
Lianmin Zheng
2024-11-17 16:53:44 -08:00
committed by GitHub
parent 62832bb272
commit ebaa2f3199
5 changed files with 17 additions and 20 deletions

View File

@@ -139,7 +139,7 @@ class ModelRunner:
"disable_mla": server_args.disable_mla,
"torchao_config": server_args.torchao_config,
"disable_penalizer": server_args.disable_penalizer,
"disable_nan_detection": server_args.disable_nan_detection,
"enable_nan_detection": server_args.enable_nan_detection,
"enable_dp_attention": server_args.enable_dp_attention,
}
)
@@ -276,6 +276,10 @@ class ModelRunner:
else None
)
self.dtype = self.vllm_model_config.dtype
if self.sliding_window_size:
assert (
self.server_args.attention_backend == "flashinfer"
), "Only flashinfer supports window attention."
logger.info(
f"Load weight end. "