[2/N][Pangu][MoE] Remove Pangu Related Code (#5130)
### What this PR does / why we need it?
Remove Pangu Related Code
### Does this PR introduce _any_ user-facing change?
No
### How was this patch tested?
e2e & ut
- vLLM version: v0.12.0
- vLLM main:
ad32e3e19c
---------
Signed-off-by: weichen <calvin_zhu0210@outlook.com>
This commit is contained in:
@@ -2504,14 +2504,8 @@ class NPUModelRunner(GPUModelRunner):
|
||||
# the min of all `num_blocks`. Verify it here.
|
||||
assert num_blocks >= kv_cache_config.num_blocks
|
||||
|
||||
if self.vllm_config.additional_config.get(
|
||||
"kv_cache_dtype", None) == 'int8':
|
||||
kv_cache_shape = attn_backend.get_bsh_kv_cache_shape(
|
||||
num_blocks, kv_cache_spec.block_size,
|
||||
kv_cache_spec.num_kv_heads,
|
||||
kv_cache_spec.head_size)
|
||||
elif hasattr(attn_backend, "get_supported_block_size"
|
||||
) and self.use_hybrid_blocks:
|
||||
if hasattr(attn_backend, "get_supported_block_size"
|
||||
) and self.use_hybrid_blocks:
|
||||
block_size = attn_backend.get_supported_block_size()[0]
|
||||
|
||||
block_size_chunk = kv_cache_spec.block_size // block_size
|
||||
|
||||
Reference in New Issue
Block a user