vllm-ascend support chunked prefill (#1172)
### What this PR does / why we need it? vllm-ascend support chunked prefill for MLA --------- Signed-off-by: fems14 <1804143737@qq.com>
This commit is contained in:
@@ -39,6 +39,8 @@ class AscendConfig:
|
||||
self.expert_tensor_parallel_size = int(
|
||||
additional_config.get("expert_tensor_parallel_size", 0))
|
||||
self.expert_map_path = additional_config.get("expert_map_path", None)
|
||||
self.chunked_prefill_for_mla = additional_config.get(
|
||||
"chunked_prefill_for_mla", False)
|
||||
|
||||
|
||||
class TorchairGraphConfig:
|
||||
|
||||
Reference in New Issue
Block a user