[Core] Make V1 work and enable V1 engine test (#389)
1. Make sure the version is string before parse in collect_env 2. Add basic V1 engine test Signed-off-by: wangxiyuan <wangxiyuan1007@gmail.com>
This commit is contained in:
@@ -100,3 +100,4 @@ def test_fused_experts(
|
||||
e_map)
|
||||
# TODO: The native params are: atol=2e-2, rtol=0, maybe related to the nan problem
|
||||
torch.testing.assert_close(output, torch_output, atol=4e-2, rtol=1)
|
||||
torch.npu.empty_cache()
|
||||
|
||||
@@ -45,8 +45,6 @@ def test_models(
|
||||
dtype: str,
|
||||
max_tokens: int,
|
||||
) -> None:
|
||||
os.environ["VLLM_ATTENTION_BACKEND"] = "ASCEND"
|
||||
|
||||
# 5042 tokens for gemma2
|
||||
# gemma2 has alternating sliding window size of 4096
|
||||
# we need a prompt with more than 4096 tokens to test the sliding window
|
||||
@@ -60,3 +58,8 @@ def test_models(
|
||||
enforce_eager=False,
|
||||
gpu_memory_utilization=0.7) as vllm_model:
|
||||
vllm_model.generate_greedy(example_prompts, max_tokens)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
import pytest
|
||||
pytest.main([__file__])
|
||||
|
||||
Reference in New Issue
Block a user