[long_seq] remove long_seq env (#4660)

### What this PR does / why we need it?
remove env VLLM_ASCEND_ENABLE_CONTEXT_PARALLEL 

- vLLM version: v0.12.0

---------

Signed-off-by: LookAround <lixushi@huawei.com>
Signed-off-by: ZhangMingWei716 <2894054457@qq.com>
Co-authored-by: ZhangMingWei716 <2894054457@qq.com>
Co-authored-by: wangxiyuan <wangxiyuan1007@gmail.com>
This commit is contained in:
LookAround0301
2025-12-05 10:31:49 +08:00
committed by GitHub
parent ea54388e19
commit b32ef53b3b
16 changed files with 230 additions and 176 deletions

View File

@@ -33,11 +33,12 @@ from vllm_ascend.torchair.utils import (check_torchair_cache_exist,
from vllm_ascend.utils import refresh_block_size
# isort: off
from vllm_ascend.utils import (
ASCEND_QUANTIZATION_METHOD, COMPRESSED_TENSORS_METHOD, AscendDeviceType,
enable_sp, get_ascend_device_type, is_vl_model,
prefill_context_parallel_enable, update_aclgraph_sizes,
update_cudagraph_capture_sizes, update_default_aclgraph_sizes)
from vllm_ascend.utils import (ASCEND_QUANTIZATION_METHOD,
COMPRESSED_TENSORS_METHOD, AscendDeviceType,
enable_sp, get_ascend_device_type, is_vl_model,
update_aclgraph_sizes,
update_cudagraph_capture_sizes,
update_default_aclgraph_sizes)
if TYPE_CHECKING:
from vllm.config import ModelConfig, VllmConfig
@@ -329,7 +330,6 @@ class NPUPlatform(Platform):
vllm_config.scheduler_config.SLO_limits_for_dynamic_batch = ascend_config.SLO_limits_for_dynamic_batch
if vllm_config.kv_transfer_config is not None and \
prefill_context_parallel_enable() and \
cache_config.block_size != parallel_config.cp_kv_cache_interleave_size and \
parallel_config.decode_context_parallel_size * parallel_config.prefill_context_parallel_size > 1:
raise AssertionError(