### What this PR does / why we need it?
Support an new load format: RFORK
For implementation details of this feature, please refer to #7441
### Does this PR introduce _any_ user-facing change?
add an new options for load-format: rfork
e.g.
```bash
vllm serve /workspace/models/Qwen3-8B --load-format rfork
```
### How was this patch tested?
- vLLM version: v0.17.0
- vLLM main:
4034c3d32e
Signed-off-by: Marck <1412354149@qq.com>
209 lines
7.4 KiB
Python
209 lines
7.4 KiB
Python
#
|
|
# Copyright (c) 2026 Huawei Technologies Co., Ltd. All Rights Reserved.
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
#
|
|
|
|
import time
|
|
from urllib.error import HTTPError
|
|
|
|
import requests
|
|
from vllm.logger import logger
|
|
from vllm.utils.network_utils import get_ip
|
|
|
|
REQUEST_TIMEOUT_SEC = 10.0
|
|
HEARTBEAT_LOG_EVERY_N = 4
|
|
|
|
|
|
def get_local_seed_key(
|
|
disaggregation_mode: str,
|
|
node_rank: int,
|
|
tp_rank: int,
|
|
model_url: str,
|
|
model_deploy_strategy_name: str,
|
|
seed_key_separator: str = "$",
|
|
is_draft_worker: bool = False,
|
|
) -> str:
|
|
if not model_url or not model_deploy_strategy_name:
|
|
raise RuntimeError(
|
|
"RFork seed key is not set. Ensure model_loader_extra_config contains "
|
|
"`model_url` and `model_deploy_strategy_name`."
|
|
)
|
|
|
|
seed_key = f"{model_url}{seed_key_separator}{model_deploy_strategy_name}"
|
|
key_suffix = f"{disaggregation_mode}{seed_key_separator}{node_rank}{seed_key_separator}{tp_rank}"
|
|
if is_draft_worker:
|
|
key_suffix += f"{seed_key_separator}draft"
|
|
return f"{seed_key}{seed_key_separator}{key_suffix}"
|
|
|
|
|
|
class RForkSeedProtocol:
|
|
def __init__(
|
|
self,
|
|
*,
|
|
disaggregation_mode: str,
|
|
node_rank: int,
|
|
tp_rank: int,
|
|
scheduler_url: str,
|
|
model_url: str,
|
|
model_deploy_strategy_name: str,
|
|
seed_key_separator: str = "$",
|
|
is_draft_worker: bool = False,
|
|
):
|
|
self.disaggregation_mode = disaggregation_mode
|
|
self.node_rank = node_rank
|
|
self.tp_rank = tp_rank
|
|
self.scheduler_url = scheduler_url
|
|
self.model_url = model_url
|
|
self.model_deploy_strategy_name = model_deploy_strategy_name
|
|
self.seed_key_separator = seed_key_separator
|
|
self.is_draft_worker = is_draft_worker
|
|
|
|
self._local_seed_key = get_local_seed_key(
|
|
disaggregation_mode=self.disaggregation_mode,
|
|
node_rank=self.node_rank,
|
|
tp_rank=self.tp_rank,
|
|
model_url=self.model_url,
|
|
model_deploy_strategy_name=self.model_deploy_strategy_name,
|
|
seed_key_separator=self.seed_key_separator,
|
|
is_draft_worker=self.is_draft_worker,
|
|
)
|
|
|
|
def get_local_seed_key(self) -> str:
|
|
return self._local_seed_key
|
|
|
|
@staticmethod
|
|
def _request_timeout_sec() -> float:
|
|
return REQUEST_TIMEOUT_SEC
|
|
|
|
def _ensure_scheduler_url_set(self) -> None:
|
|
if not self.scheduler_url:
|
|
raise RuntimeError("rfork_scheduler_url is not set. Cannot interact with the scheduler.")
|
|
|
|
def get_seed(self):
|
|
try:
|
|
self._ensure_scheduler_url_set()
|
|
response = requests.get(
|
|
f"{self.scheduler_url}/get_seed",
|
|
headers={
|
|
"SEED_KEY": self.get_local_seed_key(),
|
|
},
|
|
timeout=self._request_timeout_sec(),
|
|
)
|
|
if response.status_code != 200:
|
|
raise RuntimeError(f"Failed to get seed from the planner, {response.status_code}")
|
|
|
|
seed_ip = response.headers.get("SEED_IP")
|
|
seed_port = response.headers.get("SEED_PORT")
|
|
user_id = response.headers.get("USER_ID")
|
|
seed_rank = response.headers.get("SEED_RANK")
|
|
logger.debug(
|
|
"seed_ip: %s, seed_port: %s, user_id: %s, seed_rank: %s",
|
|
seed_ip,
|
|
seed_port,
|
|
user_id,
|
|
seed_rank,
|
|
)
|
|
return {
|
|
"seed_ip": seed_ip,
|
|
"seed_port": seed_port,
|
|
"user_id": user_id,
|
|
"seed_rank": seed_rank,
|
|
}
|
|
|
|
except RuntimeError as e:
|
|
logger.warning("get_seed from planner RuntimeError: %s", e)
|
|
return None
|
|
except HTTPError as e:
|
|
logger.exception("get_seed from planner HTTPError: %s", e)
|
|
return None
|
|
except Exception as e:
|
|
logger.exception("get_seed from planner Exception: %s", e)
|
|
return None
|
|
|
|
def release_seed(self, seed) -> bool:
|
|
try:
|
|
self._ensure_scheduler_url_set()
|
|
user_id = seed["user_id"]
|
|
seed_ip = seed["seed_ip"]
|
|
seed_port = str(seed["seed_port"])
|
|
seed_rank = str(seed["seed_rank"])
|
|
|
|
response = requests.post(
|
|
f"{self.scheduler_url}/put_seed",
|
|
headers={
|
|
"SEED_IP": seed_ip,
|
|
"SEED_PORT": seed_port,
|
|
"USER_ID": user_id,
|
|
"SEED_RANK": seed_rank,
|
|
},
|
|
timeout=self._request_timeout_sec(),
|
|
)
|
|
|
|
if response.status_code != 200:
|
|
raise RuntimeError(f"Failed to release seed to the planner, {response.status_code}")
|
|
return True
|
|
except RuntimeError as e:
|
|
logger.exception("release_seed to planner RuntimeError: %s", e)
|
|
return False
|
|
except HTTPError as e:
|
|
logger.exception("release_seed to planner HTTPError: %s", e)
|
|
return False
|
|
except Exception as e:
|
|
logger.exception("release_seed to planner Exception: %s", e)
|
|
return False
|
|
|
|
def report_seed(self, port: int, sleep_interval: int = 30):
|
|
heartbeat_idx = 0
|
|
log_every_n = HEARTBEAT_LOG_EVERY_N
|
|
try:
|
|
self._ensure_scheduler_url_set()
|
|
seed_ip = get_ip()
|
|
seed_key = self.get_local_seed_key()
|
|
except Exception as e:
|
|
logger.exception("report_seed setup Exception: %s", e)
|
|
return
|
|
|
|
while True:
|
|
heartbeat_idx += 1
|
|
result = False
|
|
try:
|
|
response = requests.post(
|
|
f"{self.scheduler_url}/add_seed",
|
|
headers={
|
|
"SEED_KEY": seed_key,
|
|
"SEED_IP": seed_ip,
|
|
"SEED_PORT": str(port),
|
|
"SEED_RANK": str(self.tp_rank),
|
|
"SEED_REFCNT": str(0),
|
|
},
|
|
timeout=self._request_timeout_sec(),
|
|
)
|
|
if response.status_code == 200:
|
|
result = True
|
|
except HTTPError as e:
|
|
logger.exception("report_seed to planner HTTPError: %s", e)
|
|
except Exception as e:
|
|
logger.exception("report_seed to planner Exception: %s", e)
|
|
|
|
# Keep heartbeat frequency unchanged, but reduce log noise.
|
|
# Always print failures immediately; print success once every N times.
|
|
if (not result) or (heartbeat_idx % log_every_n == 0):
|
|
logger.info(
|
|
"[rfork_heartbeat] report seed to planner result: %s (%d/%d)",
|
|
result,
|
|
heartbeat_idx % log_every_n if heartbeat_idx % log_every_n != 0 else log_every_n,
|
|
log_every_n,
|
|
)
|
|
time.sleep(sleep_interval)
|