[Feature] Add PD separation feature (#432)
### What this PR does / why we need it? Adapt Disaggregated Prefill feature onto Ascend device ### Does this PR introduce _any_ user-facing change? no ### How was this patch tested? The test usage has been provided alongwith the PR, in examples/offline_disaggregated_prefill_npu.py To run it, do this ``` export PROMPT_DEVICE_ID=0,1 export DECODE_DEVICE_ID=2,3 python examples/offline_disaggregated_prefill_npu.py ``` --------- Signed-off-by: ZihuiQian <qianzihui@huawei.com> Co-authored-by: ZihuiQian <qianzihui@huawei.com>
This commit is contained in:
@@ -25,7 +25,8 @@ import torch.nn as nn
|
||||
import torch_npu
|
||||
from vllm import envs
|
||||
from vllm.config import VllmConfig
|
||||
from vllm.distributed import (ensure_model_parallel_initialized,
|
||||
from vllm.distributed import (ensure_kv_transfer_initialized,
|
||||
ensure_model_parallel_initialized,
|
||||
init_distributed_environment,
|
||||
set_custom_all_reduce)
|
||||
from vllm.logger import logger
|
||||
@@ -197,6 +198,7 @@ class NPUWorker(WorkerBase):
|
||||
ensure_model_parallel_initialized(
|
||||
self.parallel_config.tensor_parallel_size,
|
||||
self.parallel_config.pipeline_parallel_size)
|
||||
ensure_kv_transfer_initialized(self.vllm_config)
|
||||
|
||||
def _init_profiler(self):
|
||||
# Torch profiler. Enabled and configured through env vars:
|
||||
@@ -230,4 +232,4 @@ class NPUWorker(WorkerBase):
|
||||
on_trace_ready=torch_npu.profiler.tensorboard_trace_handler(
|
||||
torch_profiler_trace_dir))
|
||||
else:
|
||||
return None
|
||||
return None
|
||||
Reference in New Issue
Block a user