[NVIDIA] FA3/FA4 Fix (#11606)
Co-authored-by: Baizhou Zhang <sobereddiezhang@gmail.com>
This commit is contained in:
@@ -1071,6 +1071,16 @@ class ServerArgs:
|
||||
self.enable_mixed_chunk = False
|
||||
self.disable_radix_cache = True
|
||||
|
||||
if self.attention_backend == "fa4" or self.decode_attention_backend == "fa4":
|
||||
raise ValueError(
|
||||
"FA4 backend is only supported for prefill. Please use `--prefill-attention-backend fa4` instead."
|
||||
)
|
||||
if self.prefill_attention_backend == "fa4":
|
||||
logger.warning(
|
||||
f"FA4 backend only supports page size 128, changing page_size from {self.page_size} to 128."
|
||||
)
|
||||
self.page_size = 128
|
||||
|
||||
def _handle_page_size(self):
|
||||
if self.page_size is None:
|
||||
self.page_size = 1
|
||||
|
||||
@@ -129,6 +129,11 @@ def is_in_amd_ci():
|
||||
return get_bool_env_var("SGLANG_IS_IN_CI_AMD")
|
||||
|
||||
|
||||
def is_blackwell_system():
|
||||
"""Return whether it is running on a Blackwell (B200) system."""
|
||||
return get_bool_env_var("IS_BLACKWELL")
|
||||
|
||||
|
||||
def _use_cached_default_models(model_repo: str):
|
||||
cache_dir = os.getenv("DEFAULT_MODEL_CACHE_DIR")
|
||||
if cache_dir and model_repo:
|
||||
@@ -151,6 +156,9 @@ DEFAULT_URL_FOR_TEST = f"http://127.0.0.1:{DEFAULT_PORT_FOR_SRT_TEST_RUNNER + 10
|
||||
if is_in_amd_ci():
|
||||
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH = 3000
|
||||
|
||||
if is_blackwell_system():
|
||||
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH = 3000
|
||||
|
||||
|
||||
def call_generate_lightllm(prompt, temperature, max_tokens, stop=None, url=None):
|
||||
assert url is not None
|
||||
|
||||
Reference in New Issue
Block a user