[MM][Bugfix] Update hf_config to hf_text_config (#5319)
### What this PR does / why we need it?
Following https://github.com/vllm-project/vllm-ascend/pull/5205, update
`hf_config` to `hf_text_config`.
Find more details at
https://github.com/vllm-project/vllm-ascend/pull/5205#issuecomment-3675417534
and
https://github.com/vllm-project/vllm-ascend/pull/5205#issuecomment-3677920872.
### Does this PR introduce _any_ user-facing change?
### How was this patch tested?
- vLLM version: release/v0.13.0
- vLLM main:
5fbfa8d9ef
Signed-off-by: shen-shanshan <467638484@qq.com>
This commit is contained in:
@@ -253,7 +253,7 @@ class NPUModelRunner(GPUModelRunner):
|
||||
self.is_multimodal_model = self.model_config.is_multimodal_model
|
||||
self.block_size = vllm_config.cache_config.block_size
|
||||
# Set up Attention
|
||||
self.use_sparse = hasattr(self.vllm_config.model_config.hf_config,
|
||||
self.use_sparse = hasattr(self.vllm_config.model_config.hf_text_config,
|
||||
"index_topk")
|
||||
self.attn_backend = get_attn_backend(
|
||||
0,
|
||||
@@ -2398,7 +2398,7 @@ class NPUModelRunner(GPUModelRunner):
|
||||
kv_caches[layer_name] = kv_caches[target_layer_name]
|
||||
|
||||
from vllm.v1.worker.utils import bind_kv_cache
|
||||
num_attn_module = 2 if self.model_config.hf_config.model_type == "longcat_flash" else 1
|
||||
num_attn_module = 2 if self.model_config.hf_text_config.model_type == "longcat_flash" else 1
|
||||
bind_kv_cache(kv_caches,
|
||||
self.compilation_config.static_forward_context,
|
||||
self.kv_caches, num_attn_module)
|
||||
@@ -2932,7 +2932,7 @@ class NPUModelRunner(GPUModelRunner):
|
||||
mamba_layers = get_layers_from_vllm_config(self.vllm_config, MambaBase)
|
||||
if len(mamba_layers) > 0:
|
||||
if (self.vllm_config.speculative_config is not None
|
||||
and self.vllm_config.model_config.hf_config.model_type
|
||||
and self.vllm_config.model_config.hf_text_config.model_type
|
||||
not in ["qwen3_next"]):
|
||||
raise NotImplementedError(
|
||||
"Mamba with speculative decoding is not supported yet.")
|
||||
|
||||
@@ -173,7 +173,7 @@ class NPUWorker(WorkerBase):
|
||||
allocator = CaMemAllocator.get_instance()
|
||||
allocator.wake_up(tags=tags)
|
||||
|
||||
hidden_size = self.vllm_config.model_config.hf_config.hidden_size
|
||||
hidden_size = self.vllm_config.model_config.hf_text_config.hidden_size
|
||||
model = self.model_runner.model
|
||||
if tags is None or "weights" in tags:
|
||||
for name, param in model.named_parameters():
|
||||
|
||||
Reference in New Issue
Block a user