forked from EngineX-Cambricon/enginex-mlu370-vllm
add qwen3
This commit is contained in:
19
vllm-v0.6.2/tests/tpu/test_custom_dispatcher.py
Normal file
19
vllm-v0.6.2/tests/tpu/test_custom_dispatcher.py
Normal file
@@ -0,0 +1,19 @@
|
||||
import os
|
||||
|
||||
from vllm.compilation.levels import CompilationLevel
|
||||
|
||||
from ..utils import compare_two_settings
|
||||
|
||||
# --enforce-eager on TPU causes graph compilation
|
||||
# this times out default Health Check in the MQLLMEngine,
|
||||
# so we set the timeout here to 30s
|
||||
os.environ["VLLM_RPC_TIMEOUT"] = "30000"
|
||||
|
||||
|
||||
def test_custom_dispatcher():
|
||||
compare_two_settings(
|
||||
"google/gemma-2b",
|
||||
arg1=["--enforce-eager"],
|
||||
arg2=["--enforce-eager"],
|
||||
env1={"VLLM_TORCH_COMPILE_LEVEL": str(CompilationLevel.DYNAMO_ONCE)},
|
||||
env2={"VLLM_TORCH_COMPILE_LEVEL": str(CompilationLevel.DYNAMO_AS_IS)})
|
||||
Reference in New Issue
Block a user