Move args from global_config to environ (#11332)
This commit is contained in:
@@ -37,7 +37,6 @@ import copy
|
||||
import dataclasses
|
||||
import logging
|
||||
import re
|
||||
import threading
|
||||
import time
|
||||
from enum import Enum, auto
|
||||
from http import HTTPStatus
|
||||
@@ -47,7 +46,6 @@ from typing import TYPE_CHECKING, Any, List, Optional, Set, Tuple, Union
|
||||
import numpy as np
|
||||
import torch
|
||||
|
||||
from sglang.global_config import global_config
|
||||
from sglang.srt.constrained.base_grammar_backend import BaseGrammarObject
|
||||
from sglang.srt.disaggregation.base import BaseKVSender
|
||||
from sglang.srt.disaggregation.decode_schedule_batch_mixin import (
|
||||
@@ -55,6 +53,7 @@ from sglang.srt.disaggregation.decode_schedule_batch_mixin import (
|
||||
)
|
||||
from sglang.srt.disaggregation.utils import DisaggregationMode
|
||||
from sglang.srt.distributed.parallel_state import get_tensor_model_parallel_rank
|
||||
from sglang.srt.environ import envs
|
||||
from sglang.srt.mem_cache.allocator import (
|
||||
BaseTokenToKVPoolAllocator,
|
||||
SWATokenToKVPoolAllocator,
|
||||
@@ -1481,7 +1480,8 @@ class ScheduleBatch(ScheduleBatchDisaggregationDecodeMixin):
|
||||
total_max_new_tokens = sum(r.sampling_params.max_new_tokens for r in self.reqs)
|
||||
|
||||
new_estimate_ratio = (
|
||||
total_decoded_tokens + global_config.retract_decode_steps * len(self.reqs)
|
||||
total_decoded_tokens
|
||||
+ envs.SGLANG_RETRACT_DECODE_STEPS.get() * len(self.reqs)
|
||||
) / total_max_new_tokens
|
||||
new_estimate_ratio = min(1.0, new_estimate_ratio)
|
||||
|
||||
@@ -1520,7 +1520,7 @@ class ScheduleBatch(ScheduleBatchDisaggregationDecodeMixin):
|
||||
self.tree_cache.dec_lock_ref(req.last_node)
|
||||
|
||||
# NOTE(lsyin): we should use the newly evictable memory instantly.
|
||||
num_tokens = remaing_req_count * global_config.retract_decode_steps
|
||||
num_tokens = remaing_req_count * envs.SGLANG_RETRACT_DECODE_STEPS.get()
|
||||
self._evict_tree_cache_if_needed(num_tokens)
|
||||
|
||||
req.reset_for_retract()
|
||||
|
||||
Reference in New Issue
Block a user