Deprecate global_server_args_dict (#11331)
This commit is contained in:
@@ -122,7 +122,6 @@ from sglang.srt.managers.schedule_batch import (
|
||||
Req,
|
||||
RequestStage,
|
||||
ScheduleBatch,
|
||||
global_server_args_dict,
|
||||
)
|
||||
from sglang.srt.managers.schedule_policy import (
|
||||
AddReqResult,
|
||||
@@ -150,7 +149,7 @@ from sglang.srt.mem_cache.radix_cache import RadixCache
|
||||
from sglang.srt.mem_cache.swa_radix_cache import SWARadixCache
|
||||
from sglang.srt.model_executor.forward_batch_info import ForwardBatch, PPProxyTensors
|
||||
from sglang.srt.parser.reasoning_parser import ReasoningParser
|
||||
from sglang.srt.server_args import PortArgs, ServerArgs
|
||||
from sglang.srt.server_args import PortArgs, ServerArgs, get_global_server_args
|
||||
from sglang.srt.speculative.eagle_info import EagleDraftInput
|
||||
from sglang.srt.speculative.spec_info import SpeculativeAlgorithm
|
||||
from sglang.srt.tracing.trace import (
|
||||
@@ -447,13 +446,12 @@ class Scheduler(
|
||||
self.max_req_input_len,
|
||||
self.random_seed,
|
||||
self.device,
|
||||
worker_global_server_args_dict,
|
||||
_,
|
||||
_,
|
||||
_,
|
||||
) = self.tp_worker.get_worker_info()
|
||||
if global_server_args_dict["pp_max_micro_batch_size"] is None:
|
||||
global_server_args_dict["pp_max_micro_batch_size"] = max(
|
||||
if get_global_server_args().pp_max_micro_batch_size is None:
|
||||
get_global_server_args().pp_max_micro_batch_size = max(
|
||||
self.max_running_requests // server_args.pp_size, 1
|
||||
)
|
||||
|
||||
@@ -465,7 +463,6 @@ class Scheduler(
|
||||
self.world_group = get_world_group()
|
||||
|
||||
self.pad_input_ids_func = self.tp_worker.get_pad_input_ids_func()
|
||||
global_server_args_dict.update(worker_global_server_args_dict)
|
||||
set_random_seed(self.random_seed)
|
||||
|
||||
# Hybrid memory pool
|
||||
@@ -1866,7 +1863,7 @@ class Scheduler(
|
||||
return ret
|
||||
|
||||
def get_num_allocatable_reqs(self, running_bs):
|
||||
res = global_server_args_dict["pp_max_micro_batch_size"] - running_bs
|
||||
res = get_global_server_args().pp_max_micro_batch_size - running_bs
|
||||
if self.pp_size > 1:
|
||||
res = min(res, self.req_to_token_pool.available_size())
|
||||
return res
|
||||
@@ -2610,7 +2607,7 @@ class Scheduler(
|
||||
)
|
||||
|
||||
def get_internal_state(self, recv_req: GetInternalStateReq):
|
||||
ret = dict(global_server_args_dict)
|
||||
ret = vars(get_global_server_args())
|
||||
ret["last_gen_throughput"] = self.last_gen_throughput
|
||||
ret["memory_usage"] = {
|
||||
"weight": round(
|
||||
@@ -2666,11 +2663,11 @@ class Scheduler(
|
||||
logger.info(f"{avg_spec_accept_length=}")
|
||||
self.cum_spec_accept_length = self.cum_spec_accept_count = 0
|
||||
for k, v in server_args_dict.items():
|
||||
global_server_args_dict[k] = v
|
||||
logger.info(f"Global server args updated! {global_server_args_dict=}")
|
||||
setattr(get_global_server_args(), k, v)
|
||||
logger.info(f"Global server args updated! {get_global_server_args()=}")
|
||||
return SetInternalStateReqOutput(
|
||||
updated=True,
|
||||
server_args=global_server_args_dict,
|
||||
server_args=vars(get_global_server_args()),
|
||||
)
|
||||
|
||||
def handle_rpc_request(self, recv_req: RpcReqInput):
|
||||
|
||||
Reference in New Issue
Block a user