From 15c75e41462dfdb6e405bf061ab0640bb04ccdbf Mon Sep 17 00:00:00 2001 From: Lianmin Zheng Date: Wed, 11 Sep 2024 04:36:21 -0700 Subject: [PATCH] [Fix] Fix --disable-flashinfer (#1389) --- python/sglang/srt/server_args.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/python/sglang/srt/server_args.py b/python/sglang/srt/server_args.py index 0881344c0..776f7bec3 100644 --- a/python/sglang/srt/server_args.py +++ b/python/sglang/srt/server_args.py @@ -140,11 +140,13 @@ class ServerArgs: "The option '--disable-flashinfer' will be deprecated in the next release. " "Please use '--attention-backend triton' instead." ) + self.attention_backend = "triton" if self.disable_flashinfer_sampling: logger.warning( "The option '--disable-flashinfer-sampling' will be deprecated in the next release. " "Please use '--sampling-backend pytorch' instead. " ) + self.sampling_backend = "pytorch" # Model-specific patches if "Alibaba-NLP/gte-Qwen2-1.5B-instruct" == self.model_path: