[feat]decode convert bsnd to tnd and fix bug when pcp and dcp (#3980)
### What this PR does / why we need it?
1、in attention_v1 module, convert bsnd t0 tnd when pcp and dcp
2、fix tochair bug: service startup problem
### Does this PR introduce _any_ user-facing change?
### How was this patch tested?
- vLLM version: v0.11.0
- vLLM main:
83f478bb19
Signed-off-by: weiguihua2 <weiguihua2@huawei.com>
This commit is contained in:
@@ -63,10 +63,25 @@ class TestAscendAttentionBackend(TestBase):
|
||||
|
||||
class TestAscendAttentionMetadataBuilder(TestBase):
|
||||
|
||||
def setUp(self):
|
||||
@patch('vllm.distributed.parallel_state.get_dcp_group')
|
||||
@patch('vllm.distributed.parallel_state._DCP',
|
||||
new_callable=lambda: MagicMock(spec=GroupCoordinator))
|
||||
@patch("vllm.distributed.get_decode_context_model_parallel_world_size",
|
||||
return_value=1)
|
||||
def setUp(self, mock_get_dcp_size, mock_dcp, mock_get_dcp_group):
|
||||
mock_dcp.world_size = 1
|
||||
dcp_group = MagicMock(spec=GroupCoordinator)
|
||||
dcp_group.rank_in_group = 0
|
||||
dcp_group.world_size = 1
|
||||
dcp_group.device_group = MagicMock()
|
||||
mock_get_dcp_group.return_value = dcp_group
|
||||
|
||||
self.mock_vllm_config = MagicMock()
|
||||
self.mock_vllm_config.model_config.max_model_len = 640
|
||||
self.mock_vllm_config.cache_config.block_size = 64
|
||||
self.mock_vllm_config.compilation_config.cudagraph_mode = None
|
||||
self.mock_vllm_config.scheduler_config.max_num_seqs = 10
|
||||
self.mock_vllm_config.scheduler_config.decode_max_num_seqs = 10
|
||||
self.mock_device = 'cpu:0'
|
||||
self.builder = AscendAttentionMetadataBuilder(None, None,
|
||||
self.mock_vllm_config,
|
||||
|
||||
Reference in New Issue
Block a user