Files
xc-llm-ascend/vllm_ascend/model_loader/rfork/seed_protocol.py
Marck 17da96658f [ModelLoader][Feature] Add rfork support for fast model loading (#7392)
### What this PR does / why we need it?
Support an new load format: RFORK

For implementation details of this feature, please refer to #7441


### Does this PR introduce _any_ user-facing change?

add an new options for load-format: rfork

e.g.
```bash
vllm serve /workspace/models/Qwen3-8B --load-format rfork
```

### How was this patch tested?

- vLLM version: v0.17.0
- vLLM main:
4034c3d32e

Signed-off-by: Marck <1412354149@qq.com>
2026-03-25 16:40:30 +08:00

209 lines
7.4 KiB
Python

#
# Copyright (c) 2026 Huawei Technologies Co., Ltd. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
import time
from urllib.error import HTTPError
import requests
from vllm.logger import logger
from vllm.utils.network_utils import get_ip
REQUEST_TIMEOUT_SEC = 10.0
HEARTBEAT_LOG_EVERY_N = 4
def get_local_seed_key(
disaggregation_mode: str,
node_rank: int,
tp_rank: int,
model_url: str,
model_deploy_strategy_name: str,
seed_key_separator: str = "$",
is_draft_worker: bool = False,
) -> str:
if not model_url or not model_deploy_strategy_name:
raise RuntimeError(
"RFork seed key is not set. Ensure model_loader_extra_config contains "
"`model_url` and `model_deploy_strategy_name`."
)
seed_key = f"{model_url}{seed_key_separator}{model_deploy_strategy_name}"
key_suffix = f"{disaggregation_mode}{seed_key_separator}{node_rank}{seed_key_separator}{tp_rank}"
if is_draft_worker:
key_suffix += f"{seed_key_separator}draft"
return f"{seed_key}{seed_key_separator}{key_suffix}"
class RForkSeedProtocol:
def __init__(
self,
*,
disaggregation_mode: str,
node_rank: int,
tp_rank: int,
scheduler_url: str,
model_url: str,
model_deploy_strategy_name: str,
seed_key_separator: str = "$",
is_draft_worker: bool = False,
):
self.disaggregation_mode = disaggregation_mode
self.node_rank = node_rank
self.tp_rank = tp_rank
self.scheduler_url = scheduler_url
self.model_url = model_url
self.model_deploy_strategy_name = model_deploy_strategy_name
self.seed_key_separator = seed_key_separator
self.is_draft_worker = is_draft_worker
self._local_seed_key = get_local_seed_key(
disaggregation_mode=self.disaggregation_mode,
node_rank=self.node_rank,
tp_rank=self.tp_rank,
model_url=self.model_url,
model_deploy_strategy_name=self.model_deploy_strategy_name,
seed_key_separator=self.seed_key_separator,
is_draft_worker=self.is_draft_worker,
)
def get_local_seed_key(self) -> str:
return self._local_seed_key
@staticmethod
def _request_timeout_sec() -> float:
return REQUEST_TIMEOUT_SEC
def _ensure_scheduler_url_set(self) -> None:
if not self.scheduler_url:
raise RuntimeError("rfork_scheduler_url is not set. Cannot interact with the scheduler.")
def get_seed(self):
try:
self._ensure_scheduler_url_set()
response = requests.get(
f"{self.scheduler_url}/get_seed",
headers={
"SEED_KEY": self.get_local_seed_key(),
},
timeout=self._request_timeout_sec(),
)
if response.status_code != 200:
raise RuntimeError(f"Failed to get seed from the planner, {response.status_code}")
seed_ip = response.headers.get("SEED_IP")
seed_port = response.headers.get("SEED_PORT")
user_id = response.headers.get("USER_ID")
seed_rank = response.headers.get("SEED_RANK")
logger.debug(
"seed_ip: %s, seed_port: %s, user_id: %s, seed_rank: %s",
seed_ip,
seed_port,
user_id,
seed_rank,
)
return {
"seed_ip": seed_ip,
"seed_port": seed_port,
"user_id": user_id,
"seed_rank": seed_rank,
}
except RuntimeError as e:
logger.warning("get_seed from planner RuntimeError: %s", e)
return None
except HTTPError as e:
logger.exception("get_seed from planner HTTPError: %s", e)
return None
except Exception as e:
logger.exception("get_seed from planner Exception: %s", e)
return None
def release_seed(self, seed) -> bool:
try:
self._ensure_scheduler_url_set()
user_id = seed["user_id"]
seed_ip = seed["seed_ip"]
seed_port = str(seed["seed_port"])
seed_rank = str(seed["seed_rank"])
response = requests.post(
f"{self.scheduler_url}/put_seed",
headers={
"SEED_IP": seed_ip,
"SEED_PORT": seed_port,
"USER_ID": user_id,
"SEED_RANK": seed_rank,
},
timeout=self._request_timeout_sec(),
)
if response.status_code != 200:
raise RuntimeError(f"Failed to release seed to the planner, {response.status_code}")
return True
except RuntimeError as e:
logger.exception("release_seed to planner RuntimeError: %s", e)
return False
except HTTPError as e:
logger.exception("release_seed to planner HTTPError: %s", e)
return False
except Exception as e:
logger.exception("release_seed to planner Exception: %s", e)
return False
def report_seed(self, port: int, sleep_interval: int = 30):
heartbeat_idx = 0
log_every_n = HEARTBEAT_LOG_EVERY_N
try:
self._ensure_scheduler_url_set()
seed_ip = get_ip()
seed_key = self.get_local_seed_key()
except Exception as e:
logger.exception("report_seed setup Exception: %s", e)
return
while True:
heartbeat_idx += 1
result = False
try:
response = requests.post(
f"{self.scheduler_url}/add_seed",
headers={
"SEED_KEY": seed_key,
"SEED_IP": seed_ip,
"SEED_PORT": str(port),
"SEED_RANK": str(self.tp_rank),
"SEED_REFCNT": str(0),
},
timeout=self._request_timeout_sec(),
)
if response.status_code == 200:
result = True
except HTTPError as e:
logger.exception("report_seed to planner HTTPError: %s", e)
except Exception as e:
logger.exception("report_seed to planner Exception: %s", e)
# Keep heartbeat frequency unchanged, but reduce log noise.
# Always print failures immediately; print success once every N times.
if (not result) or (heartbeat_idx % log_every_n == 0):
logger.info(
"[rfork_heartbeat] report seed to planner result: %s (%d/%d)",
result,
heartbeat_idx % log_every_n if heartbeat_idx % log_every_n != 0 else log_every_n,
log_every_n,
)
time.sleep(sleep_interval)