[FIX] Fix the nightly CI by disabling swa mem pool for gemma2 (#8693)
This commit is contained in:
@@ -1928,6 +1928,12 @@ class ServerArgs:
|
|||||||
if "Llama4" in model_arch:
|
if "Llama4" in model_arch:
|
||||||
assert self.attention_backend == "fa3", "fa3 is required for Llama4 model"
|
assert self.attention_backend == "fa3", "fa3 is required for Llama4 model"
|
||||||
|
|
||||||
|
if "Gemma2ForCausalLM" in model_arch:
|
||||||
|
# FIXME: https://github.com/sgl-project/sglang/pull/7367 is not compatible with gemma2 model.
|
||||||
|
# It failed at this test: https://github.com/sgl-project/sglang/actions/runs/16255155597/job/45890331952#step:4:736
|
||||||
|
logger.warning("Disable hybrid SWA memory for Gemma2ForCausalLM.")
|
||||||
|
self.disable_hybrid_swa_memory = True
|
||||||
|
|
||||||
# Check LoRA
|
# Check LoRA
|
||||||
self.check_lora_server_args()
|
self.check_lora_server_args()
|
||||||
|
|
||||||
|
|||||||
@@ -27,9 +27,6 @@ import torch.nn.functional as F
|
|||||||
|
|
||||||
from sglang.bench_serving import run_benchmark
|
from sglang.bench_serving import run_benchmark
|
||||||
from sglang.global_config import global_config
|
from sglang.global_config import global_config
|
||||||
from sglang.lang.backend.openai import OpenAI
|
|
||||||
from sglang.lang.backend.runtime_endpoint import RuntimeEndpoint
|
|
||||||
from sglang.lang.interpreter import ProgramState
|
|
||||||
from sglang.srt.utils import (
|
from sglang.srt.utils import (
|
||||||
get_bool_env_var,
|
get_bool_env_var,
|
||||||
get_device,
|
get_device,
|
||||||
@@ -358,6 +355,9 @@ def add_common_sglang_args_and_parse(parser: argparse.ArgumentParser):
|
|||||||
|
|
||||||
|
|
||||||
def select_sglang_backend(args: argparse.Namespace):
|
def select_sglang_backend(args: argparse.Namespace):
|
||||||
|
from sglang.lang.backend.openai import OpenAI
|
||||||
|
from sglang.lang.backend.runtime_endpoint import RuntimeEndpoint
|
||||||
|
|
||||||
if args.backend.startswith("srt"):
|
if args.backend.startswith("srt"):
|
||||||
if args.backend == "srt-no-parallel":
|
if args.backend == "srt-no-parallel":
|
||||||
global_config.enable_parallel_encoding = False
|
global_config.enable_parallel_encoding = False
|
||||||
|
|||||||
Reference in New Issue
Block a user