Use an env var SGLANG_SET_CPU_AFFINITY to set cpu affinity; turn it off by default (#2222)

This commit is contained in:
Lianmin Zheng
2024-11-27 02:52:46 -08:00
committed by GitHub
parent 6997e28f6e
commit fb6e04a0c2
6 changed files with 23 additions and 20 deletions

View File

@@ -14,13 +14,13 @@
import json
import logging
import os
from enum import IntEnum, auto
from typing import List, Optional
from transformers import PretrainedConfig
from sglang.srt.hf_transformers_utils import get_config, get_context_length
from sglang.srt.utils import get_bool_env_var
logger = logging.getLogger(__name__)
@@ -59,13 +59,9 @@ class ModelConfig:
# Derive context length
derived_context_len = get_context_length(self.hf_text_config)
allow_long_context = os.environ.get(
"SGLANG_ALLOW_OVERWRITE_LONGER_CONTEXT_LEN", None
)
if context_length is not None:
if context_length > derived_context_len:
if allow_long_context:
if get_bool_env_var("SGLANG_ALLOW_OVERWRITE_LONGER_CONTEXT_LEN"):
logger.warning(
f"Warning: User-specified context_length ({context_length}) is greater than the derived context_length ({derived_context_len}). "
f"This may lead to incorrect model outputs or CUDA errors."