Improve code styles (#4021)
This commit is contained in:
@@ -198,8 +198,6 @@ class DataParallelController:
|
||||
self.max_total_num_tokens = scheduler_info[0]["max_total_num_tokens"]
|
||||
self.max_req_input_len = scheduler_info[0]["max_req_input_len"]
|
||||
|
||||
print(f"{scheduler_info=}")
|
||||
|
||||
def round_robin_scheduler(self, req):
|
||||
self.workers[self.round_robin_counter].send_pyobj(req)
|
||||
self.round_robin_counter = (self.round_robin_counter + 1) % len(self.workers)
|
||||
@@ -222,7 +220,6 @@ class DataParallelController:
|
||||
TokenizedEmbeddingReqInput,
|
||||
),
|
||||
):
|
||||
logger.info("dispatching")
|
||||
self.dispatching(recv_req)
|
||||
else:
|
||||
# Send other control messages to first worker of tp group
|
||||
|
||||
@@ -158,7 +158,7 @@ class GenerateReqInput:
|
||||
# Expand parallel_sample_num
|
||||
num = self.batch_size * self.parallel_sample_num
|
||||
|
||||
if self.image_data is None:
|
||||
if not self.image_data:
|
||||
self.image_data = [None] * num
|
||||
elif not isinstance(self.image_data, list):
|
||||
self.image_data = [self.image_data] * num
|
||||
|
||||
@@ -282,6 +282,8 @@ class Req:
|
||||
# If we want to abort the request in the middle of the event loop, set this to true
|
||||
# Note: We should never set finished_reason in the middle, the req will get filtered and never respond
|
||||
self.to_abort = False
|
||||
# This carries the error message for `.to_abort` and will be attached to the finished_reason at the end of the event loop
|
||||
self.to_abort_message: str = "Unknown error"
|
||||
self.stream = stream
|
||||
self.eos_token_ids = eos_token_ids
|
||||
|
||||
@@ -359,8 +361,6 @@ class Req:
|
||||
# The tokens is prefilled but need to be considered as decode tokens
|
||||
# and should be updated for the decode logprobs
|
||||
self.last_update_decode_tokens = 0
|
||||
# The relative logprob_start_len in an extend batch
|
||||
self.extend_logprob_start_len = 0
|
||||
|
||||
# Embedding (return values)
|
||||
self.embedding = None
|
||||
@@ -377,9 +377,6 @@ class Req:
|
||||
self.spec_verify_ct = 0
|
||||
self.lora_path = lora_path
|
||||
|
||||
# This carries the error message for `.to_abort` and will be attached to the finished_reason at the end of the event loop
|
||||
self.to_abort_message: str = "Unknown error"
|
||||
|
||||
@property
|
||||
def seqlen(self):
|
||||
return len(self.origin_input_ids) + len(self.output_ids)
|
||||
|
||||
@@ -358,7 +358,6 @@ class Scheduler:
|
||||
self.cum_spec_accept_count = 0
|
||||
self.last_decode_stats_tic = time.time()
|
||||
self.return_health_check_ct = 0
|
||||
self.stream_interval = server_args.stream_interval
|
||||
self.current_stream = torch.get_device_module(self.device).current_stream()
|
||||
if self.device == "cpu":
|
||||
self.current_stream.synchronize = lambda: None # No-op for CPU
|
||||
@@ -444,11 +443,6 @@ class Scheduler:
|
||||
},
|
||||
)
|
||||
|
||||
# The largest prefill length of a single request
|
||||
self._largest_prefill_len: int = 0
|
||||
# The largest context length (prefill + generation) of a single request
|
||||
self._largest_prefill_decode_len: int = 0
|
||||
|
||||
# Init request dispatcher
|
||||
self._request_dispatcher = TypeBasedDispatcher(
|
||||
[
|
||||
@@ -2309,8 +2303,6 @@ def run_scheduler_process(
|
||||
if get_bool_env_var("SGLANG_SET_CPU_AFFINITY"):
|
||||
set_gpu_proc_affinity(server_args.tp_size, server_args.nnodes, gpu_id)
|
||||
|
||||
parent_process = psutil.Process().parent()
|
||||
|
||||
# Create a scheduler and run the event loop
|
||||
try:
|
||||
scheduler = Scheduler(server_args, port_args, gpu_id, tp_rank, dp_rank)
|
||||
|
||||
Reference in New Issue
Block a user