[Bugfix] fix ZeroDivisionError when prefill_tp_size > num_kv_head and fix tp_resharding README (#3437)
### What this PR does / why we need it? Fix ZeroDivisionError when prefill_tp_size > num_kv_head, in this situation, num_head_replica can be 0 and used to divide another value, this PR restricts the minimum value of a to be 1. And this PR fix tp_resharding README. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? By CI. - vLLM version: v0.11.0rc3 - vLLM main: https://github.com/vllm-project/vllm/commit/v0.11.0 --------- Signed-off-by: liziyu <liziyu16@huawei.com> Signed-off-by: nwpu-zxr <zhouxuerong2@huawei.com> Co-authored-by: liziyu <liziyu16@huawei.com>
This commit is contained in:
@@ -79,7 +79,7 @@ class TestKVCacheSendingLayerThreadBasic(unittest.TestCase):
|
||||
self.p1 = patch(
|
||||
'vllm_ascend.distributed.mooncake_layerwise_connector.get_ascend_config',
|
||||
new=MagicMock(return_value=SimpleNamespace(
|
||||
pd_tp_ratio=1, num_head_replica=0, pd_head_ratio=1)))
|
||||
pd_tp_ratio=1, num_head_replica=1, pd_head_ratio=1)))
|
||||
self.p2 = patch(
|
||||
'vllm_ascend.distributed.mooncake_layerwise_connector.get_current_vllm_config',
|
||||
new=MagicMock(return_value=SimpleNamespace(
|
||||
@@ -244,7 +244,7 @@ class TestSendingLayerThread(unittest.TestCase):
|
||||
self.p1 = patch(
|
||||
'vllm_ascend.distributed.mooncake_layerwise_connector.get_ascend_config',
|
||||
new=MagicMock(return_value=SimpleNamespace(
|
||||
pd_tp_ratio=1, num_head_replica=0, pd_head_ratio=1)))
|
||||
pd_tp_ratio=1, num_head_replica=1, pd_head_ratio=1)))
|
||||
self.p2 = patch(
|
||||
'vllm_ascend.distributed.mooncake_layerwise_connector.get_current_vllm_config',
|
||||
new=MagicMock(return_value=SimpleNamespace(
|
||||
@@ -903,7 +903,7 @@ class TestMooncakeLayerwiseConnectorWorker(unittest.TestCase):
|
||||
patch(
|
||||
'vllm_ascend.distributed.mooncake_layerwise_connector.get_ascend_config',
|
||||
return_value=SimpleNamespace(pd_tp_ratio=1,
|
||||
num_head_replica=0,
|
||||
num_head_replica=1,
|
||||
pd_head_ratio=1),
|
||||
),
|
||||
patch(
|
||||
|
||||
Reference in New Issue
Block a user