[Feat]Xlite Qwen3-vl Support (#5228)

### What this PR does / why we need it? This patch adds support for the Qwen3-VL model in Xlite. For more details about Xlite, please refer to the following link:https://atomgit.com/openeuler/GVirt/blob/master/xlite/README.md. The latest performance comparison data between xlite and the default aclgraph mode is as follows: ### Does this PR introduce _any_ user-facing change? XLite graph mode supports the Qwen3-VL model. ### How was this patch tested? vLLM version: v0.12.0 - vLLM version: release/v0.13.0 - vLLM main: ad32e3e19c Signed-off-by: lvjunqi <lvjunqi1@huawei.com> Co-authored-by: lvjunqi <lvjunqi1@huawei.com>
2025-12-22 16:30:52 +08:00
parent 78aa7f2693
commit 55beac9c91
4 changed files with 19 additions and 9 deletions
--- a/vllm_ascend/xlite/xlite.py
+++ b/vllm_ascend/xlite/xlite.py
@@ -48,16 +48,23 @@ class LlamaXliteModel(XliteModel):
            vllm_config: VllmConfig) -> Tuple[Model, int, int, torch.dtype]:
        dtype = vllm_config.model_config.dtype
        params_dict = dict(runnable.named_parameters())
-        layers = runnable.model.layers
+
+        if hasattr(runnable, "language_model"):
+            layers = runnable.language_model.model.layers
+            model_prefix = "language_model."
+        else:
+            layers = runnable.model.layers
+            model_prefix = ""

        config = self._build_model_config(vllm_config)
        xlite_model = Model()
-        xlite_model.embed = params_dict.get("model.embed_tokens.weight")
-        xlite_model.norm = params_dict.get("model.norm.weight")
+        xlite_model.embed = params_dict.get(model_prefix +
+                                            "model.embed_tokens.weight")
+        xlite_model.norm = params_dict.get(model_prefix + "model.norm.weight")
        if vllm_config.model_config.hf_config.tie_word_embeddings:
            xlite_model.head = xlite_model.embed
        else:
-            xlite_model.head = params_dict.get("lm_head.weight")
+            xlite_model.head = params_dict.get(model_prefix + "lm_head.weight")
        xlite_model.attn_norm = [
            layer.input_layernorm.weight for layer in layers
        ]
@@ -112,6 +119,8 @@ class LlamaXliteModel(XliteModel):

    def _build_model_config(self, vllm_config: VllmConfig) -> ModelConfig:
        hf_config = vllm_config.model_config.hf_config
+        if hasattr(hf_config, "text_config"):
+            hf_config = hf_config.text_config
        config = ModelConfig()
        config.vocab_size = hf_config.vocab_size
        config.hidden_size = hf_config.hidden_size
@@ -166,6 +175,7 @@ def xlite_model_init(
        "LlamaForCausalLM": LlamaXliteModel,
        "Qwen2ForCausalLM": LlamaXliteModel,
        "Qwen3ForCausalLM": LlamaXliteModel,
+        "Qwen3VLForConditionalGeneration": LlamaXliteModel,
    }

    architecture = vllm_config.model_config.architectures[0]