Sync from v0.13

This commit is contained in:
2026-01-19 10:38:50 +08:00
parent b2ef04d792
commit 5aef6c175a
3714 changed files with 854317 additions and 89342 deletions

View File

@@ -1,25 +1,30 @@
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
"""Containing tests that check for regressions in vLLM's behavior.
It should include tests that are reported by users and making sure they
will never happen again.
"""
import gc
import pytest
import torch
from vllm import LLM, SamplingParams
@pytest.mark.skip(reason="In V1, we reject tokens > max_seq_len")
def test_duplicated_ignored_sequence_group():
"""https://github.com/vllm-project/vllm/issues/1655"""
sampling_params = SamplingParams(temperature=0.01,
top_p=0.1,
max_tokens=256)
llm = LLM(model="facebook/opt-125m",
max_num_batched_tokens=4096,
tensor_parallel_size=1)
sampling_params = SamplingParams(temperature=0.01, top_p=0.1, max_tokens=256)
llm = LLM(
model="distilbert/distilgpt2",
max_num_batched_tokens=4096,
tensor_parallel_size=1,
)
prompts = ["This is a short prompt", "This is a very long prompt " * 1000]
outputs = llm.generate(prompts, sampling_params=sampling_params)
@@ -27,12 +32,12 @@ def test_duplicated_ignored_sequence_group():
def test_max_tokens_none():
sampling_params = SamplingParams(temperature=0.01,
top_p=0.1,
max_tokens=None)
llm = LLM(model="facebook/opt-125m",
max_num_batched_tokens=4096,
tensor_parallel_size=1)
sampling_params = SamplingParams(temperature=0.01, top_p=0.1, max_tokens=None)
llm = LLM(
model="distilbert/distilgpt2",
max_num_batched_tokens=4096,
tensor_parallel_size=1,
)
prompts = ["Just say hello!"]
outputs = llm.generate(prompts, sampling_params=sampling_params)
@@ -40,7 +45,7 @@ def test_max_tokens_none():
def test_gc():
llm = LLM("facebook/opt-125m", enforce_eager=True)
llm = LLM(model="distilbert/distilgpt2", enforce_eager=True)
del llm
gc.collect()
@@ -53,6 +58,22 @@ def test_gc():
assert allocated < 50 * 1024 * 1024
if __name__ == "__main__":
import pytest
pytest.main([__file__])
def test_model_from_modelscope(monkeypatch: pytest.MonkeyPatch):
# model: https://modelscope.cn/models/qwen/Qwen1.5-0.5B-Chat/summary
with monkeypatch.context() as m:
m.setenv("VLLM_USE_MODELSCOPE", "True")
# Don't use HF_TOKEN for ModelScope repos, otherwise it will fail
# with 400 Client Error: Bad Request.
m.setenv("HF_TOKEN", "")
llm = LLM(model="qwen/Qwen1.5-0.5B-Chat")
prompts = [
"Hello, my name is",
"The president of the United States is",
"The capital of France is",
"The future of AI is",
]
sampling_params = SamplingParams(temperature=0.8, top_p=0.95)
outputs = llm.generate(prompts, sampling_params)
assert len(outputs) == 4