[bugfix_v0.11.0]cancel tokenize for layerwise_proxy (#3913)

### What this PR does / why we need it?
cancel tokenize for layerwise_proxy
### Does this PR introduce _any_ user-facing change?
no
### How was this patch tested?
by ci

Signed-off-by: wangxiaoteng <wangxiaoteng@huawei.com>
This commit is contained in:
wangxiaoteng888
2025-10-30 23:55:04 +08:00
committed by GitHub
parent af7a56550b
commit 38afd2c9cb
2 changed files with 1 additions and 8 deletions

View File

@@ -98,7 +98,6 @@ from typing import List
import httpx import httpx
from fastapi import FastAPI, Request from fastapi import FastAPI, Request
from fastapi.responses import StreamingResponse from fastapi.responses import StreamingResponse
from transformers import AutoTokenizer
from vllm.logger import init_logger from vllm.logger import init_logger
logger = init_logger(__name__) logger = init_logger(__name__)
@@ -153,8 +152,6 @@ class ProxyState:
heapq.heapify(self.decoder_heap) heapq.heapify(self.decoder_heap)
self.req_id_future = {} self.req_id_future = {}
self.req_data_dict = {} self.req_data_dict = {}
self.tokenizer = AutoTokenizer.from_pretrained(
global_args.tokenizer_dir)
def _update_prefiller_priority(self, server_idx: int): def _update_prefiller_priority(self, server_idx: int):
"""Update the priority of a prefiller server in the heap.""" """Update the priority of a prefiller server in the heap."""
@@ -281,10 +278,6 @@ def parse_args():
nargs="+", nargs="+",
default=["localhost"]) default=["localhost"])
parser.add_argument("--decoder-ports", type=int, nargs="+", default=[8002]) parser.add_argument("--decoder-ports", type=int, nargs="+", default=[8002])
parser.add_argument("--tokenizer-dir",
type=str,
default="/mnt/weight/Qwen3-235B-A22B-W8A8",
help="Maximum number of retries for HTTP requests")
parser.add_argument("--max-retries", parser.add_argument("--max-retries",
type=int, type=int,
default=3, default=3,

View File

@@ -656,7 +656,7 @@ class MooncakeLayerwiseConnectorWorker:
self.device_id = device_ids[self.tp_rank] # type: ignore self.device_id = device_ids[self.tp_rank] # type: ignore
if vllm_config.kv_transfer_config.get_from_extra_config( if vllm_config.kv_transfer_config.get_from_extra_config(
'use_ascend_direct', True): 'use_ascend_direct', False):
hostname = self.side_channel_host hostname = self.side_channel_host
else: else:
hostname = f"{self.side_channel_host}:0:npu_{self.device_id}" hostname = f"{self.side_channel_host}:0:npu_{self.device_id}"