[CI] Update guided decoding ut (#1312)
### What this PR does / why we need it? Update guided decoding ut. Signed-off-by: shen-shanshan <467638484@qq.com>
This commit is contained in:
6
.github/workflows/vllm_ascend_test.yaml
vendored
6
.github/workflows/vllm_ascend_test.yaml
vendored
@@ -259,8 +259,7 @@ jobs:
|
|||||||
# TODO: switch hf to modelscope
|
# TODO: switch hf to modelscope
|
||||||
VLLM_USE_MODELSCOPE=False HF_ENDPOINT=https://hf-mirror.com \
|
VLLM_USE_MODELSCOPE=False HF_ENDPOINT=https://hf-mirror.com \
|
||||||
pytest -sv tests/e2e/singlecard/test_ilama_lora.py
|
pytest -sv tests/e2e/singlecard/test_ilama_lora.py
|
||||||
# TODO(sss): guided decoding doesn't work, fix it later
|
pytest -sv tests/e2e/singlecard/test_guided_decoding.py
|
||||||
# pytest -sv tests/e2e/singlecard/test_guided_decoding.py
|
|
||||||
pytest -sv tests/e2e/singlecard/test_camem.py
|
pytest -sv tests/e2e/singlecard/test_camem.py
|
||||||
pytest -sv tests/e2e/singlecard/ \
|
pytest -sv tests/e2e/singlecard/ \
|
||||||
--ignore=tests/e2e/singlecard/test_offline_inference.py \
|
--ignore=tests/e2e/singlecard/test_offline_inference.py \
|
||||||
@@ -278,8 +277,7 @@ jobs:
|
|||||||
# TODO: switch hf to modelscope
|
# TODO: switch hf to modelscope
|
||||||
VLLM_USE_MODELSCOPE=False HF_ENDPOINT=https://hf-mirror.com \
|
VLLM_USE_MODELSCOPE=False HF_ENDPOINT=https://hf-mirror.com \
|
||||||
pytest -sv tests/e2e/singlecard/test_ilama_lora.py
|
pytest -sv tests/e2e/singlecard/test_ilama_lora.py
|
||||||
# guided decoding doesn't work, fix it later
|
pytest -sv tests/e2e/singlecard/test_guided_decoding.py
|
||||||
# pytest -sv tests/e2e/singlecard/test_guided_decoding.py
|
|
||||||
pytest -sv tests/e2e/singlecard/test_camem.py
|
pytest -sv tests/e2e/singlecard/test_camem.py
|
||||||
pytest -sv tests/e2e/singlecard/test_prompt_embedding.py
|
pytest -sv tests/e2e/singlecard/test_prompt_embedding.py
|
||||||
pytest -sv tests/e2e/singlecard/ \
|
pytest -sv tests/e2e/singlecard/ \
|
||||||
|
|||||||
@@ -28,13 +28,10 @@ from vllm.sampling_params import GuidedDecodingParams, SamplingParams
|
|||||||
from tests.conftest import VllmRunner
|
from tests.conftest import VllmRunner
|
||||||
|
|
||||||
os.environ["PYTORCH_NPU_ALLOC_CONF"] = "max_split_size_mb:256"
|
os.environ["PYTORCH_NPU_ALLOC_CONF"] = "max_split_size_mb:256"
|
||||||
MODEL_NAME = "Qwen/Qwen2.5-1.5B-Instruct"
|
MODEL_NAME = "Qwen/Qwen2.5-0.5B-Instruct"
|
||||||
GuidedDecodingBackendV0 = [
|
|
||||||
"outlines",
|
GuidedDecodingBackendV0 = ["outlines", "lm-format-enforcer", "xgrammar"]
|
||||||
"lm-format-enforcer",
|
GuidedDecodingBackendV1 = ["xgrammar", "guidance"]
|
||||||
"xgrammar",
|
|
||||||
]
|
|
||||||
GuidedDecodingBackendV1 = ["xgrammar", "guidance:disable-any-whitespace"]
|
|
||||||
GuidedDecodingBackend = list(
|
GuidedDecodingBackend = list(
|
||||||
set(GuidedDecodingBackendV0 + GuidedDecodingBackendV1))
|
set(GuidedDecodingBackendV0 + GuidedDecodingBackendV1))
|
||||||
|
|
||||||
@@ -87,26 +84,25 @@ def sample_json_schema():
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def check_backend(guided_decoding_backend: str):
|
||||||
|
if guided_decoding_backend not in GuidedDecodingBackendV0 and os.getenv(
|
||||||
|
"VLLM_USE_V1") == "0":
|
||||||
|
pytest.skip(f"{guided_decoding_backend} does not support v0, skip it.")
|
||||||
|
if guided_decoding_backend not in GuidedDecodingBackendV1 and os.getenv(
|
||||||
|
"VLLM_USE_V1") == "1":
|
||||||
|
pytest.skip(f"{guided_decoding_backend} does not support v1, skip it.")
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize("guided_decoding_backend", GuidedDecodingBackend)
|
@pytest.mark.parametrize("guided_decoding_backend", GuidedDecodingBackend)
|
||||||
def test_guided_json_completion(guided_decoding_backend: str,
|
def test_guided_json_completion(guided_decoding_backend: str,
|
||||||
sample_json_schema):
|
sample_json_schema):
|
||||||
if guided_decoding_backend == "xgrammar":
|
check_backend(guided_decoding_backend)
|
||||||
# xgrammar does not support json schema, will fall back to outlines, skip it
|
|
||||||
pytest.skip(
|
|
||||||
f"{guided_decoding_backend} will fall back to outlines, skip it")
|
|
||||||
if guided_decoding_backend not in GuidedDecodingBackendV0 and os.getenv(
|
|
||||||
"VLLM_USE_V1") == "0":
|
|
||||||
# guidance does not support on v0, skip it
|
|
||||||
pytest.skip(
|
|
||||||
f"{guided_decoding_backend} does not support on v0, skip it")
|
|
||||||
if guided_decoding_backend not in GuidedDecodingBackendV1 and os.getenv(
|
|
||||||
"VLLM_USE_V1") == "1":
|
|
||||||
pytest.skip(f"{guided_decoding_backend} does not support v1, skip it")
|
|
||||||
|
|
||||||
sampling_params = SamplingParams(
|
sampling_params = SamplingParams(
|
||||||
temperature=1.0,
|
temperature=1.0,
|
||||||
max_tokens=1000,
|
max_tokens=500,
|
||||||
guided_decoding=GuidedDecodingParams(json=sample_json_schema))
|
guided_decoding=GuidedDecodingParams(json=sample_json_schema))
|
||||||
|
|
||||||
with VllmRunner(
|
with VllmRunner(
|
||||||
MODEL_NAME,
|
MODEL_NAME,
|
||||||
seed=0,
|
seed=0,
|
||||||
@@ -138,19 +134,13 @@ def test_guided_json_completion(guided_decoding_backend: str,
|
|||||||
|
|
||||||
@pytest.mark.parametrize("guided_decoding_backend", GuidedDecodingBackend)
|
@pytest.mark.parametrize("guided_decoding_backend", GuidedDecodingBackend)
|
||||||
def test_guided_regex(guided_decoding_backend: str, sample_regex):
|
def test_guided_regex(guided_decoding_backend: str, sample_regex):
|
||||||
if guided_decoding_backend not in GuidedDecodingBackendV0 and os.getenv(
|
check_backend(guided_decoding_backend)
|
||||||
"VLLM_USE_V1") == "0":
|
|
||||||
# guidance does not support on v0, skip it
|
sampling_params = SamplingParams(
|
||||||
pytest.skip(
|
temperature=0.8,
|
||||||
f"{guided_decoding_backend} does not support on v0, skip it")
|
top_p=0.95,
|
||||||
if guided_decoding_backend not in GuidedDecodingBackendV1 and os.getenv(
|
guided_decoding=GuidedDecodingParams(regex=sample_regex))
|
||||||
"VLLM_USE_V1") == "1":
|
|
||||||
pytest.skip(f"{guided_decoding_backend} does not support v1, skip it")
|
|
||||||
|
|
||||||
sampling_params = SamplingParams(temperature=0.8,
|
|
||||||
top_p=0.95,
|
|
||||||
guided_decoding=GuidedDecodingParams(
|
|
||||||
regex=sample_regex, ))
|
|
||||||
with VllmRunner(
|
with VllmRunner(
|
||||||
MODEL_NAME,
|
MODEL_NAME,
|
||||||
seed=0,
|
seed=0,
|
||||||
|
|||||||
Reference in New Issue
Block a user