[bugfix_v0.11.0]cancel tokenize for layerwise_proxy (#3913)
### What this PR does / why we need it? cancel tokenize for layerwise_proxy ### Does this PR introduce _any_ user-facing change? no ### How was this patch tested? by ci Signed-off-by: wangxiaoteng <wangxiaoteng@huawei.com>
This commit is contained in:
@@ -98,7 +98,6 @@ from typing import List
|
|||||||
import httpx
|
import httpx
|
||||||
from fastapi import FastAPI, Request
|
from fastapi import FastAPI, Request
|
||||||
from fastapi.responses import StreamingResponse
|
from fastapi.responses import StreamingResponse
|
||||||
from transformers import AutoTokenizer
|
|
||||||
from vllm.logger import init_logger
|
from vllm.logger import init_logger
|
||||||
|
|
||||||
logger = init_logger(__name__)
|
logger = init_logger(__name__)
|
||||||
@@ -153,8 +152,6 @@ class ProxyState:
|
|||||||
heapq.heapify(self.decoder_heap)
|
heapq.heapify(self.decoder_heap)
|
||||||
self.req_id_future = {}
|
self.req_id_future = {}
|
||||||
self.req_data_dict = {}
|
self.req_data_dict = {}
|
||||||
self.tokenizer = AutoTokenizer.from_pretrained(
|
|
||||||
global_args.tokenizer_dir)
|
|
||||||
|
|
||||||
def _update_prefiller_priority(self, server_idx: int):
|
def _update_prefiller_priority(self, server_idx: int):
|
||||||
"""Update the priority of a prefiller server in the heap."""
|
"""Update the priority of a prefiller server in the heap."""
|
||||||
@@ -281,10 +278,6 @@ def parse_args():
|
|||||||
nargs="+",
|
nargs="+",
|
||||||
default=["localhost"])
|
default=["localhost"])
|
||||||
parser.add_argument("--decoder-ports", type=int, nargs="+", default=[8002])
|
parser.add_argument("--decoder-ports", type=int, nargs="+", default=[8002])
|
||||||
parser.add_argument("--tokenizer-dir",
|
|
||||||
type=str,
|
|
||||||
default="/mnt/weight/Qwen3-235B-A22B-W8A8",
|
|
||||||
help="Maximum number of retries for HTTP requests")
|
|
||||||
parser.add_argument("--max-retries",
|
parser.add_argument("--max-retries",
|
||||||
type=int,
|
type=int,
|
||||||
default=3,
|
default=3,
|
||||||
|
|||||||
@@ -656,7 +656,7 @@ class MooncakeLayerwiseConnectorWorker:
|
|||||||
self.device_id = device_ids[self.tp_rank] # type: ignore
|
self.device_id = device_ids[self.tp_rank] # type: ignore
|
||||||
|
|
||||||
if vllm_config.kv_transfer_config.get_from_extra_config(
|
if vllm_config.kv_transfer_config.get_from_extra_config(
|
||||||
'use_ascend_direct', True):
|
'use_ascend_direct', False):
|
||||||
hostname = self.side_channel_host
|
hostname = self.side_channel_host
|
||||||
else:
|
else:
|
||||||
hostname = f"{self.side_channel_host}:0:npu_{self.device_id}"
|
hostname = f"{self.side_channel_host}:0:npu_{self.device_id}"
|
||||||
|
|||||||
Reference in New Issue
Block a user