[Fix] Fix --disable-flashinfer (#1389)
This commit is contained in:
@@ -140,11 +140,13 @@ class ServerArgs:
|
|||||||
"The option '--disable-flashinfer' will be deprecated in the next release. "
|
"The option '--disable-flashinfer' will be deprecated in the next release. "
|
||||||
"Please use '--attention-backend triton' instead."
|
"Please use '--attention-backend triton' instead."
|
||||||
)
|
)
|
||||||
|
self.attention_backend = "triton"
|
||||||
if self.disable_flashinfer_sampling:
|
if self.disable_flashinfer_sampling:
|
||||||
logger.warning(
|
logger.warning(
|
||||||
"The option '--disable-flashinfer-sampling' will be deprecated in the next release. "
|
"The option '--disable-flashinfer-sampling' will be deprecated in the next release. "
|
||||||
"Please use '--sampling-backend pytorch' instead. "
|
"Please use '--sampling-backend pytorch' instead. "
|
||||||
)
|
)
|
||||||
|
self.sampling_backend = "pytorch"
|
||||||
|
|
||||||
# Model-specific patches
|
# Model-specific patches
|
||||||
if "Alibaba-NLP/gte-Qwen2-1.5B-instruct" == self.model_path:
|
if "Alibaba-NLP/gte-Qwen2-1.5B-instruct" == self.model_path:
|
||||||
|
|||||||
Reference in New Issue
Block a user