[MM][Bugfix] Update hf_config to hf_text_config (#5319)

### What this PR does / why we need it? Following https://github.com/vllm-project/vllm-ascend/pull/5205, update `hf_config` to `hf_text_config`. Find more details at https://github.com/vllm-project/vllm-ascend/pull/5205#issuecomment-3675417534 and https://github.com/vllm-project/vllm-ascend/pull/5205#issuecomment-3677920872. ### Does this PR introduce _any_ user-facing change? ### How was this patch tested? - vLLM version: release/v0.13.0 - vLLM main: 5fbfa8d9ef Signed-off-by: shen-shanshan <467638484@qq.com>
2026-01-06 16:41:39 +08:00
parent 293b2275df
commit b94d589769
23 changed files with 44 additions and 43 deletions
--- a/vllm_ascend/worker/model_runner_v1.py
+++ b/vllm_ascend/worker/model_runner_v1.py
@@ -253,7 +253,7 @@ class NPUModelRunner(GPUModelRunner):
        self.is_multimodal_model = self.model_config.is_multimodal_model
        self.block_size = vllm_config.cache_config.block_size
        # Set up Attention
-        self.use_sparse = hasattr(self.vllm_config.model_config.hf_config,
+        self.use_sparse = hasattr(self.vllm_config.model_config.hf_text_config,
                                  "index_topk")
        self.attn_backend = get_attn_backend(
            0,
@@ -2398,7 +2398,7 @@ class NPUModelRunner(GPUModelRunner):
            kv_caches[layer_name] = kv_caches[target_layer_name]

        from vllm.v1.worker.utils import bind_kv_cache
-        num_attn_module = 2 if self.model_config.hf_config.model_type == "longcat_flash" else 1
+        num_attn_module = 2 if self.model_config.hf_text_config.model_type == "longcat_flash" else 1
        bind_kv_cache(kv_caches,
                      self.compilation_config.static_forward_context,
                      self.kv_caches, num_attn_module)
@@ -2932,7 +2932,7 @@ class NPUModelRunner(GPUModelRunner):
        mamba_layers = get_layers_from_vllm_config(self.vllm_config, MambaBase)
        if len(mamba_layers) > 0:
            if (self.vllm_config.speculative_config is not None
-                    and self.vllm_config.model_config.hf_config.model_type
+                    and self.vllm_config.model_config.hf_text_config.model_type
                    not in ["qwen3_next"]):
                raise NotImplementedError(
                    "Mamba with speculative decoding is not supported yet.")
--- a/vllm_ascend/worker/worker.py
+++ b/vllm_ascend/worker/worker.py
@@ -173,7 +173,7 @@ class NPUWorker(WorkerBase):
        allocator = CaMemAllocator.get_instance()
        allocator.wake_up(tags=tags)

-        hidden_size = self.vllm_config.model_config.hf_config.hidden_size
+        hidden_size = self.vllm_config.model_config.hf_text_config.hidden_size
        model = self.model_runner.model
        if tags is None or "weights" in tags:
            for name, param in model.named_parameters():