Use an env var SGLANG_SET_CPU_AFFINITY to set cpu affinity; turn it off by default (#2222)
This commit is contained in:
@@ -14,13 +14,13 @@
|
||||
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
from enum import IntEnum, auto
|
||||
from typing import List, Optional
|
||||
|
||||
from transformers import PretrainedConfig
|
||||
|
||||
from sglang.srt.hf_transformers_utils import get_config, get_context_length
|
||||
from sglang.srt.utils import get_bool_env_var
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -59,13 +59,9 @@ class ModelConfig:
|
||||
|
||||
# Derive context length
|
||||
derived_context_len = get_context_length(self.hf_text_config)
|
||||
allow_long_context = os.environ.get(
|
||||
"SGLANG_ALLOW_OVERWRITE_LONGER_CONTEXT_LEN", None
|
||||
)
|
||||
|
||||
if context_length is not None:
|
||||
if context_length > derived_context_len:
|
||||
if allow_long_context:
|
||||
if get_bool_env_var("SGLANG_ALLOW_OVERWRITE_LONGER_CONTEXT_LEN"):
|
||||
logger.warning(
|
||||
f"Warning: User-specified context_length ({context_length}) is greater than the derived context_length ({derived_context_len}). "
|
||||
f"This may lead to incorrect model outputs or CUDA errors."
|
||||
|
||||
Reference in New Issue
Block a user