[CI/UT] Add test for chunk prefill and prefix cache on v1/AscendScheduler (#1505)
### What this PR does / why we need it? Add test for chunked prefill and prefix cache on v1/AscendScheduler Covered scenarios: - `Qwen/Qwen3-0.6B-Base` and `deepseek-ai/DeepSeek-V2-Lite-Chat` --- multicard CI time increased by 19 min - `V1 + default scheduler` vs `V1 + default scheduler + enable prefix cache` - `V1 + Ascend scheduler` vs `V1 + Ascend scheduler + enable prefix cache` vs `V1 + Ascend scheduler + enable prefix cache + enable chunked prefill` - `Qwen/Qwen3-0.6B-Base` --- singlecard CI time increased by 8 min - `V1 + Ascend scheduler` vs `V1 + Ascend scheduler + enable chunked prefill` should rebase after #1498 and #1446 ### Does this PR introduce _any_ user-facing change? N/A ### How was this patch tested? CI passed with new added test. Signed-off-by: MengqingCao <cmq0113@163.com>
This commit is contained in:
@@ -19,6 +19,7 @@
|
||||
|
||||
import contextlib
|
||||
import gc
|
||||
import os
|
||||
from typing import Any, List, Optional, Tuple, TypeVar, Union
|
||||
|
||||
import numpy as np
|
||||
@@ -59,6 +60,9 @@ PromptImageInput = _PromptMultiModalInput[Image.Image]
|
||||
PromptAudioInput = _PromptMultiModalInput[Tuple[np.ndarray, int]]
|
||||
PromptVideoInput = _PromptMultiModalInput[np.ndarray]
|
||||
|
||||
_TEST_DIR = os.path.dirname(__file__)
|
||||
_TEST_PROMPTS = [os.path.join(_TEST_DIR, "e2e", "prompts", "example.txt")]
|
||||
|
||||
|
||||
def cleanup_dist_env_and_memory(shutdown_ray: bool = False):
|
||||
destroy_model_parallel()
|
||||
@@ -367,6 +371,20 @@ def prompt_template(request):
|
||||
return PROMPT_TEMPLATES[request.param]
|
||||
|
||||
|
||||
def _read_prompts(filename: str) -> list[str]:
|
||||
with open(filename) as f:
|
||||
prompts = f.readlines()
|
||||
return prompts
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def example_prompts() -> list[str]:
|
||||
prompts = []
|
||||
for filename in _TEST_PROMPTS:
|
||||
prompts += _read_prompts(filename)
|
||||
return prompts
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def ilama_lora_files():
|
||||
return snapshot_download(repo_id="jeeejeee/ilama-text2sql-spider")
|
||||
|
||||
Reference in New Issue
Block a user