[CI]Fixed the spell check function in typos.toml (#6753)
### What this PR does / why we need it?
The incorrect regular expression syntax `.*[UE4M3|ue4m3].*` actually
ignores all words containing any of the following characters: `u, e, 4,
m, 3, |`
```yaml
extend-ignore-identifiers-re = [".*Unc.*", ".*_thw",
".*UE8M0.*", ".*[UE4M3|ue4m3].*", ".*eles.*", ".*fo.*", ".*ba.*",
".*ot.*", ".*[Tt]h[rR].*"]
```
===fix===>
```yaml
extend-ignore-identifiers-re = [".*Unc.*", ".*_thw",
".*UE8M0.*", ".*(UE4M3|ue4m3]).*", ".*eles.*", ".*fo.*", ".*ba.*",
".*ot.*", ".*[Tt]h[rR].*"]
```
### Does this PR introduce _any_ user-facing change?
### How was this patch tested?
- vLLM version: v0.15.0
- vLLM main:
9562912cea
Signed-off-by: MrZ20 <2609716663@qq.com>
This commit is contained in:
@@ -178,7 +178,7 @@ class ProxyState:
|
||||
# No lock needed - atomic operation
|
||||
self.prefillers[server_idx].aborted_requests.add(request_id)
|
||||
|
||||
def aquire_aborted_prefiller_requests(self, server_idx: int): # Changed to synchronous
|
||||
def acquire_aborted_prefiller_requests(self, server_idx: int): # Changed to synchronous
|
||||
"""
|
||||
Get the set of aborted requests and clear it.
|
||||
This is used to release kv cache in prefiller node.
|
||||
@@ -325,7 +325,7 @@ async def send_request_to_service(
|
||||
max_retries: int = 3,
|
||||
base_delay: float = 0.2,
|
||||
):
|
||||
proxy_state.aquire_aborted_prefiller_requests(prefiller_id)
|
||||
proxy_state.acquire_aborted_prefiller_requests(prefiller_id)
|
||||
req_data = req_data.copy()
|
||||
req_data["stream"] = False
|
||||
req_data["max_tokens"] = 1
|
||||
|
||||
@@ -241,7 +241,7 @@ class ProxyState:
|
||||
return
|
||||
self.prefillers[server_idx].aborted_requests.add(request_id)
|
||||
|
||||
def aquire_aborted_prefiller_requests(self, server_idx: int): # Changed to synchronous
|
||||
def acquire_aborted_prefiller_requests(self, server_idx: int): # Changed to synchronous
|
||||
"""
|
||||
Get the set of aborted requests and clear it.
|
||||
This is used to release kv cache in prefiller node.
|
||||
@@ -582,7 +582,7 @@ async def send_request_to_service(
|
||||
max_retries: int = 3,
|
||||
base_delay: float = 0.2,
|
||||
):
|
||||
aborted_requests = proxy_state.aquire_aborted_prefiller_requests(prefiller_id)
|
||||
aborted_requests = proxy_state.acquire_aborted_prefiller_requests(prefiller_id)
|
||||
req_data = req_data.copy()
|
||||
req_data["kv_transfer_params"] = {
|
||||
"do_remote_decode": True,
|
||||
|
||||
@@ -59,7 +59,7 @@ def calculate_average(lst):
|
||||
return total / count
|
||||
|
||||
|
||||
def layer_imblance_polt(y_list, label_names, device_num, output_path, file_name):
|
||||
def layer_imbalance_plot(y_list, label_names, device_num, output_path, file_name):
|
||||
plt.rcParams["font.sans-serif"] = ["Arial"]
|
||||
plt.rcParams["axes.unicode_minus"] = False
|
||||
x = [i for i in range(58)]
|
||||
@@ -160,4 +160,4 @@ if __name__ == "__main__":
|
||||
save_matrix_to_json(output_path, file_name, np.array(global_deployment))
|
||||
label_names = ["default deployment max load", "balanced load max load", "balanced load avg load"]
|
||||
new_file_name = f"{exp_name}_{num_devices}_{num_redundancy_expert}.png"
|
||||
layer_imblance_polt(y_list, label_names, num_devices, output_path, new_file_name)
|
||||
layer_imbalance_plot(y_list, label_names, num_devices, output_path, new_file_name)
|
||||
|
||||
@@ -283,10 +283,10 @@ async def _select_instance(api: str, req_data: Any, request_length: int):
|
||||
request_id = await proxy_state.next_req_id()
|
||||
# Select dp server based on priority score
|
||||
server_idx = proxy_state.select_server(priority_score)
|
||||
choosen_server = proxy_state.dp_servers[server_idx]
|
||||
logger.debug(f"Choose server {choosen_server.url} to process request {request_id}")
|
||||
chosen_server = proxy_state.dp_servers[server_idx]
|
||||
logger.debug(f"Choose server {chosen_server.url} to process request {request_id}")
|
||||
return InstanceInfo(
|
||||
request_id=request_id, server_idx=server_idx, priority_score=priority_score, server_state=choosen_server
|
||||
request_id=request_id, server_idx=server_idx, priority_score=priority_score, server_state=chosen_server
|
||||
)
|
||||
|
||||
|
||||
|
||||
@@ -29,11 +29,11 @@ dp_rpc_port = args.dp_rpc_port
|
||||
vllm_start_port = args.vllm_start_port
|
||||
|
||||
|
||||
def run_command(visiable_devices, dp_rank, vllm_engine_port):
|
||||
def run_command(visible_devices, dp_rank, vllm_engine_port):
|
||||
command = [
|
||||
"bash",
|
||||
"./run_dp_template.sh",
|
||||
visiable_devices,
|
||||
visible_devices,
|
||||
str(vllm_engine_port),
|
||||
str(dp_size),
|
||||
str(dp_rank),
|
||||
@@ -55,8 +55,8 @@ if __name__ == "__main__":
|
||||
for i in range(dp_size_local):
|
||||
dp_rank = dp_rank_start + i
|
||||
vllm_engine_port = vllm_start_port + i
|
||||
visiable_devices = ",".join(str(x) for x in range(i * tp_size, (i + 1) * tp_size))
|
||||
process = multiprocessing.Process(target=run_command, args=(visiable_devices, dp_rank, vllm_engine_port))
|
||||
visible_devices = ",".join(str(x) for x in range(i * tp_size, (i + 1) * tp_size))
|
||||
process = multiprocessing.Process(target=run_command, args=(visible_devices, dp_rank, vllm_engine_port))
|
||||
processes.append(process)
|
||||
process.start()
|
||||
|
||||
|
||||
Reference in New Issue
Block a user