[CI] recover e2e test (#2688)
1. recover the skipped test.
2. remove pangu eager mode test, it's tested by torchair mode already.
3. skip pangu test util the bug is fixed.
- vLLM version: v0.10.1.1
- vLLM main:
56d04089ef
Signed-off-by: wangxiyuan <wangxiyuan1007@gmail.com>
This commit is contained in:
@@ -4,7 +4,6 @@ import pytest
|
||||
|
||||
from tests.e2e.conftest import VllmRunner
|
||||
from tests.e2e.model_utils import check_outputs_equal
|
||||
from vllm_ascend.ascend_config import clear_ascend_config
|
||||
|
||||
MODEL = "Qwen/Qwen3-0.6B"
|
||||
|
||||
@@ -27,8 +26,6 @@ def test_concurrent_partial_prefill():
|
||||
for output in outputs:
|
||||
assert len(output.outputs) == 1
|
||||
|
||||
clear_ascend_config()
|
||||
|
||||
|
||||
def test_prefix_cache_stats_is_recorded():
|
||||
with VllmRunner(MODEL,
|
||||
@@ -48,8 +45,6 @@ def test_prefix_cache_stats_is_recorded():
|
||||
outputs = vllm_model.model.generate([input_tokens])
|
||||
assert outputs[0].num_cached_tokens == 128
|
||||
|
||||
clear_ascend_config()
|
||||
|
||||
|
||||
@pytest.mark.parametrize("max_tokens",
|
||||
[4]) # cannot align results when max_tokens > 4
|
||||
@@ -91,4 +86,3 @@ def test_chunked_prefill_with_ascend_scheduler(
|
||||
name_0="vllm_output",
|
||||
name_1="chunked_prefill_output",
|
||||
)
|
||||
clear_ascend_config()
|
||||
|
||||
Reference in New Issue
Block a user