[qwen3 next ]add ascend c casual_conv1d_fn (#6661)
### What this PR does / why we need it?
add ascend c casual_conv1d_fn
- vLLM version: v0.15.0
- vLLM main:
13397841ab
---------
Signed-off-by: ZT-AIA <1028681969@qq.com>
Signed-off-by: ZT-AIA <63220130+ZT-AIA@users.noreply.github.com>
Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
This commit is contained in:
@@ -92,10 +92,15 @@ def test_qwen3_next_mtp_acceptance_tp4(model_name):
|
||||
@pytest.mark.parametrize("model_name", MODELS)
|
||||
@pytest.mark.parametrize("num_speculative_tokens", [1])
|
||||
@pytest.mark.parametrize("disable_padded_drafter_batch", [True, False])
|
||||
@pytest.mark.skip("Skip this CI.")
|
||||
def test_qwen3_next_mtp_correctness_tp4(model_name: str,
|
||||
num_speculative_tokens: int,
|
||||
disable_padded_drafter_batch: bool):
|
||||
example_prompts = [
|
||||
"Hello, my name is",
|
||||
"The president of the United States is",
|
||||
"The capital of France is",
|
||||
"The future of AI is",
|
||||
"Hello, my name is",
|
||||
"The president of the United States is",
|
||||
"The capital of France is",
|
||||
|
||||
Reference in New Issue
Block a user