[CI] fix ci (#2464)
### What this PR does / why we need it?
1. use action/checkout@v5 instead of v4
2. remove dbo test case because there is issue with it and will be
refactored later
3. make vllm-ascend compatible with vllm v0.10.1.1 and add CI for it
4. fix sampler api changes introduced by
https://github.com/vllm-project/vllm/pull/22387
6. fix qwen3 moe config changes intruoduced by
https://github.com/vllm-project/vllm/pull/20562
7. fix kvcache block changes introduced by
https://github.com/vllm-project/vllm/pull/23262
### Does this PR introduce _any_ user-facing change?
N/A
### How was this patch tested?
CI passed with existing test.
- vLLM version: v0.10.0
- vLLM main:
0c6e40bbaa
---------
Signed-off-by: MengqingCao <cmq0113@163.com>
This commit is contained in:
@@ -78,26 +78,6 @@ def test_models_distributed_DeepSeek_multistream_moe():
|
||||
vllm_model.generate_greedy(example_prompts, max_tokens)
|
||||
|
||||
|
||||
@patch.dict(os.environ, {"VLLM_ASCEND_ENABLE_DBO": "1"})
|
||||
def test_models_distributed_DeepSeek_dbo():
|
||||
example_prompts = ["The president of the United States is"] * 41
|
||||
dtype = "half"
|
||||
sampling_params = SamplingParams(max_tokens=100, temperature=0.0)
|
||||
with VllmRunner(
|
||||
"deepseek-ai/DeepSeek-V2-Lite",
|
||||
dtype=dtype,
|
||||
tensor_parallel_size=2,
|
||||
distributed_executor_backend="mp",
|
||||
) as vllm_model:
|
||||
model_arch = 'DeepseekV2ForCausalLM'
|
||||
registed_models = ModelRegistry.models
|
||||
assert registed_models[
|
||||
model_arch].module_name == "vllm_ascend.models.deepseek_dbo"
|
||||
assert registed_models[
|
||||
model_arch].class_name == "CustomDeepseekDBOForCausalLM"
|
||||
vllm_model.generate(example_prompts, sampling_params)
|
||||
|
||||
|
||||
@pytest.mark.skip(
|
||||
reason=
|
||||
"deepseek dbo dose not consider the support on half precision float, will enable this ut after we actually support it"
|
||||
|
||||
Reference in New Issue
Block a user