diff --git a/.github/workflows/misc/model_list.json b/.github/workflows/misc/model_list.json index 1bfd8fce..562c8601 100644 --- a/.github/workflows/misc/model_list.json +++ b/.github/workflows/misc/model_list.json @@ -222,6 +222,7 @@ "vllm-ascend/Qwen3-Next-80B-A3B-Instruct-W8A8-Pruning", "vllm-ascend/Qwen3-Omni-30B-A3B-Thinking", "vllm-ascend/Qwen3-VL-8B-Instruct", + "vllm-ascend/Qwen3-VL-8B-Instruct-W8A8", "vllm-ascend/TinyLlama-1.1B-Chat-v0.3", "vllm-ascend/benchmark", "vllm-ascend/ilama-3.2-1B", diff --git a/.github/workflows/schedule_nightly_test_a2.yaml b/.github/workflows/schedule_nightly_test_a2.yaml index d15b3f23..f79d9496 100644 --- a/.github/workflows/schedule_nightly_test_a2.yaml +++ b/.github/workflows/schedule_nightly_test_a2.yaml @@ -250,6 +250,7 @@ jobs: - name: accuracy-group-1 os: linux-aarch64-a2b3-1 model_list: + - Qwen3-VL-8B-Instruct-W8A8 - Qwen3-8B - Qwen2-Audio-7B-Instruct - Qwen3-8B-W8A8 diff --git a/tests/e2e/models/configs/Qwen3-VL-8B-Instruct-W8A8.yaml b/tests/e2e/models/configs/Qwen3-VL-8B-Instruct-W8A8.yaml new file mode 100644 index 00000000..9535bd33 --- /dev/null +++ b/tests/e2e/models/configs/Qwen3-VL-8B-Instruct-W8A8.yaml @@ -0,0 +1,12 @@ +model_name: "vllm-ascend/Qwen3-VL-8B-Instruct-W8A8" +hardware: "Atlas A2 Series" +model: "vllm-vlm" +tasks: +- name: "mmmu_val" + metrics: + - name: "acc,none" + value: 0.52 +max_model_len: 8192 +batch_size: 32 +gpu_memory_utilization: 0.8 +quantization: ascend \ No newline at end of file diff --git a/vllm_ascend/quantization/modelslim_config.py b/vllm_ascend/quantization/modelslim_config.py index 82b3d279..e5361dd1 100644 --- a/vllm_ascend/quantization/modelslim_config.py +++ b/vllm_ascend/quantization/modelslim_config.py @@ -57,7 +57,7 @@ QUANT_MODEL_PREFIX_MAPPINGS: dict[str, dict[str, str]] = { "language_model.lm_head.": "lm_head.", "language_model.model.": "model.language_model.", }, - "qwen3_vl_text": { + "qwen3_vl": { "visual.": "model.visual.", "language_model.lm_head.": "lm_head.", "language_model.model.": "model.language_model.",