fix: update model runner (#5934)
This commit is contained in:
@@ -81,7 +81,6 @@ from sglang.srt.utils import (
|
|||||||
get_available_gpu_memory,
|
get_available_gpu_memory,
|
||||||
get_bool_env_var,
|
get_bool_env_var,
|
||||||
init_custom_process_group,
|
init_custom_process_group,
|
||||||
is_ampere_with_cuda_12_3,
|
|
||||||
is_cuda,
|
is_cuda,
|
||||||
is_fa3_default_architecture,
|
is_fa3_default_architecture,
|
||||||
is_flashinfer_available,
|
is_flashinfer_available,
|
||||||
@@ -264,7 +263,7 @@ class ModelRunner:
|
|||||||
if not self.use_mla_backend:
|
if not self.use_mla_backend:
|
||||||
# MHA architecture
|
# MHA architecture
|
||||||
if (
|
if (
|
||||||
(is_ampere_with_cuda_12_3() or is_hopper_with_cuda_12_3())
|
is_hopper_with_cuda_12_3()
|
||||||
and is_no_spec_infer_or_topk_one(server_args)
|
and is_no_spec_infer_or_topk_one(server_args)
|
||||||
and is_fa3_default_architecture(self.model_config.hf_config)
|
and is_fa3_default_architecture(self.model_config.hf_config)
|
||||||
):
|
):
|
||||||
|
|||||||
Reference in New Issue
Block a user