KVCache Transfer via Layer-wise Strategy in Disaggregation (#2602)
### What this PR does / why we need it? See RFC: https://github.com/vllm-project/vllm-ascend/issues/2470 This PR add a new kv connector for layer-wised kv transfer ### Does this PR introduce _any_ user-facing change? yes, a new kv connector is added. User can use layer wised feature now. ### How was this patch tested? - vLLM version: v0.11.0rc3 - vLLM main: https://github.com/vllm-project/vllm/commit/releases/v0.11.0 --------- Signed-off-by: leichao.lc <leichao139636@163.com> Signed-off-by: CaveNightingale <2859066733@qq.com> Signed-off-by: nwpu-zxr <zhouxuerong2@huawei.com> Signed-off-by: wangxiaoteng <wangxiaoteng@huawei.com> Signed-off-by: hanxinlong <50882499@qq.com> Signed-off-by: liziyu <liziyu16@huawei.com> Co-authored-by: CaveNightingale <2859066733@qq.com> Co-authored-by: nwpu-zxr <zhouxuerong2@huawei.com> Co-authored-by: wangxiaoteng <wangxiaoteng@huawei.com> Co-authored-by: hanxinlong <50882499@qq.com>
This commit is contained in:
@@ -94,6 +94,17 @@ class AscendConfig:
|
||||
raise AssertionError(
|
||||
"oproj_tensor_parallel_size is only supported in pd scenario and can only be used in D node."
|
||||
)
|
||||
self.pd_tp_ratio = 1
|
||||
if vllm_config.kv_transfer_config is not None and not vllm_config.model_config.is_deepseek_mla:
|
||||
prefill_tp_size = vllm_config.kv_transfer_config.get_from_extra_config(
|
||||
"prefill", {"tp_size": 1})["tp_size"]
|
||||
decode_tp_size = vllm_config.kv_transfer_config.get_from_extra_config(
|
||||
"decode", {"tp_size": 1})["tp_size"]
|
||||
pd_tp_ratio: int = prefill_tp_size // decode_tp_size
|
||||
self.pd_tp_ratio = pd_tp_ratio
|
||||
if self.pd_tp_ratio == 0:
|
||||
raise AssertionError(
|
||||
"Only support P node tp size lagger then D node tp size")
|
||||
|
||||
|
||||
class TorchairGraphConfig:
|
||||
|
||||
Reference in New Issue
Block a user