Update to new version of base image

2025-10-24 15:45:06 +08:00
parent ee04aead1e
commit fad74b701b
476 changed files with 1270 additions and 46 deletions
--- a/vllm/model_executor/models/pycache/init.cpython-310.pyc
+++ b/vllm/model_executor/models/pycache/init.cpython-310.pyc
--- a/vllm/model_executor/models/pycache/arctic.cpython-310.pyc
+++ b/vllm/model_executor/models/pycache/arctic.cpython-310.pyc
--- a/vllm/model_executor/models/pycache/baichuan.cpython-310.pyc
+++ b/vllm/model_executor/models/pycache/baichuan.cpython-310.pyc
--- a/vllm/model_executor/models/pycache/bart.cpython-310.pyc
+++ b/vllm/model_executor/models/pycache/bart.cpython-310.pyc
--- a/vllm/model_executor/models/pycache/blip.cpython-310.pyc
+++ b/vllm/model_executor/models/pycache/blip.cpython-310.pyc
--- a/vllm/model_executor/models/pycache/blip2.cpython-310.pyc
+++ b/vllm/model_executor/models/pycache/blip2.cpython-310.pyc
--- a/vllm/model_executor/models/pycache/bloom.cpython-310.pyc
+++ b/vllm/model_executor/models/pycache/bloom.cpython-310.pyc
--- a/vllm/model_executor/models/pycache/chameleon.cpython-310.pyc
+++ b/vllm/model_executor/models/pycache/chameleon.cpython-310.pyc
--- a/vllm/model_executor/models/pycache/chatglm.cpython-310.pyc
+++ b/vllm/model_executor/models/pycache/chatglm.cpython-310.pyc
--- a/vllm/model_executor/models/pycache/clip.cpython-310.pyc
+++ b/vllm/model_executor/models/pycache/clip.cpython-310.pyc
--- a/vllm/model_executor/models/pycache/commandr.cpython-310.pyc
+++ b/vllm/model_executor/models/pycache/commandr.cpython-310.pyc
--- a/vllm/model_executor/models/pycache/dbrx.cpython-310.pyc
+++ b/vllm/model_executor/models/pycache/dbrx.cpython-310.pyc
--- a/vllm/model_executor/models/pycache/decilm.cpython-310.pyc
+++ b/vllm/model_executor/models/pycache/decilm.cpython-310.pyc
--- a/vllm/model_executor/models/pycache/deepseek.cpython-310.pyc
+++ b/vllm/model_executor/models/pycache/deepseek.cpython-310.pyc
--- a/vllm/model_executor/models/pycache/deepseek_v2.cpython-310.pyc
+++ b/vllm/model_executor/models/pycache/deepseek_v2.cpython-310.pyc
--- a/vllm/model_executor/models/pycache/eagle.cpython-310.pyc
+++ b/vllm/model_executor/models/pycache/eagle.cpython-310.pyc
--- a/vllm/model_executor/models/pycache/exaone.cpython-310.pyc
+++ b/vllm/model_executor/models/pycache/exaone.cpython-310.pyc
--- a/vllm/model_executor/models/pycache/falcon.cpython-310.pyc
+++ b/vllm/model_executor/models/pycache/falcon.cpython-310.pyc
--- a/vllm/model_executor/models/pycache/fuyu.cpython-310.pyc
+++ b/vllm/model_executor/models/pycache/fuyu.cpython-310.pyc
--- a/vllm/model_executor/models/pycache/gemma.cpython-310.pyc
+++ b/vllm/model_executor/models/pycache/gemma.cpython-310.pyc
--- a/vllm/model_executor/models/pycache/gemma2.cpython-310.pyc
+++ b/vllm/model_executor/models/pycache/gemma2.cpython-310.pyc
--- a/vllm/model_executor/models/pycache/gemma2_embedding.cpython-310.pyc
+++ b/vllm/model_executor/models/pycache/gemma2_embedding.cpython-310.pyc
--- a/vllm/model_executor/models/pycache/glm4.cpython-310.pyc
+++ b/vllm/model_executor/models/pycache/glm4.cpython-310.pyc
--- a/vllm/model_executor/models/pycache/glm4_vision_encoder.cpython-310.pyc
+++ b/vllm/model_executor/models/pycache/glm4_vision_encoder.cpython-310.pyc
--- a/vllm/model_executor/models/pycache/gpt2.cpython-310.pyc
+++ b/vllm/model_executor/models/pycache/gpt2.cpython-310.pyc
--- a/vllm/model_executor/models/pycache/gpt_bigcode.cpython-310.pyc
+++ b/vllm/model_executor/models/pycache/gpt_bigcode.cpython-310.pyc
--- a/vllm/model_executor/models/pycache/gpt_j.cpython-310.pyc
+++ b/vllm/model_executor/models/pycache/gpt_j.cpython-310.pyc
--- a/vllm/model_executor/models/pycache/gpt_neox.cpython-310.pyc
+++ b/vllm/model_executor/models/pycache/gpt_neox.cpython-310.pyc
--- a/vllm/model_executor/models/pycache/granite.cpython-310.pyc
+++ b/vllm/model_executor/models/pycache/granite.cpython-310.pyc
--- a/vllm/model_executor/models/pycache/granitemoe.cpython-310.pyc
+++ b/vllm/model_executor/models/pycache/granitemoe.cpython-310.pyc
--- a/vllm/model_executor/models/pycache/idefics2_vision_model.cpython-310.pyc
+++ b/vllm/model_executor/models/pycache/idefics2_vision_model.cpython-310.pyc
--- a/vllm/model_executor/models/pycache/interfaces.cpython-310.pyc
+++ b/vllm/model_executor/models/pycache/interfaces.cpython-310.pyc
--- a/vllm/model_executor/models/pycache/interfaces_base.cpython-310.pyc
+++ b/vllm/model_executor/models/pycache/interfaces_base.cpython-310.pyc
--- a/vllm/model_executor/models/pycache/intern_vit.cpython-310.pyc
+++ b/vllm/model_executor/models/pycache/intern_vit.cpython-310.pyc
--- a/vllm/model_executor/models/pycache/internlm2.cpython-310.pyc
+++ b/vllm/model_executor/models/pycache/internlm2.cpython-310.pyc
--- a/vllm/model_executor/models/pycache/internvl.cpython-310.pyc
+++ b/vllm/model_executor/models/pycache/internvl.cpython-310.pyc
--- a/vllm/model_executor/models/pycache/jais.cpython-310.pyc
+++ b/vllm/model_executor/models/pycache/jais.cpython-310.pyc
--- a/vllm/model_executor/models/pycache/jamba.cpython-310.pyc
+++ b/vllm/model_executor/models/pycache/jamba.cpython-310.pyc
--- a/vllm/model_executor/models/pycache/llama.cpython-310.pyc
+++ b/vllm/model_executor/models/pycache/llama.cpython-310.pyc
--- a/vllm/model_executor/models/pycache/llama_embedding.cpython-310.pyc
+++ b/vllm/model_executor/models/pycache/llama_embedding.cpython-310.pyc
--- a/vllm/model_executor/models/pycache/llava.cpython-310.pyc
+++ b/vllm/model_executor/models/pycache/llava.cpython-310.pyc
--- a/vllm/model_executor/models/pycache/llava_next.cpython-310.pyc
+++ b/vllm/model_executor/models/pycache/llava_next.cpython-310.pyc
--- a/vllm/model_executor/models/pycache/llava_next_video.cpython-310.pyc
+++ b/vllm/model_executor/models/pycache/llava_next_video.cpython-310.pyc
--- a/vllm/model_executor/models/pycache/llava_onevision.cpython-310.pyc
+++ b/vllm/model_executor/models/pycache/llava_onevision.cpython-310.pyc
--- a/vllm/model_executor/models/pycache/mamba.cpython-310.pyc
+++ b/vllm/model_executor/models/pycache/mamba.cpython-310.pyc
--- a/vllm/model_executor/models/pycache/mamba_cache.cpython-310.pyc
+++ b/vllm/model_executor/models/pycache/mamba_cache.cpython-310.pyc
--- a/vllm/model_executor/models/pycache/medusa.cpython-310.pyc
+++ b/vllm/model_executor/models/pycache/medusa.cpython-310.pyc
--- a/vllm/model_executor/models/pycache/minicpm.cpython-310.pyc
+++ b/vllm/model_executor/models/pycache/minicpm.cpython-310.pyc
--- a/vllm/model_executor/models/pycache/minicpm3.cpython-310.pyc
+++ b/vllm/model_executor/models/pycache/minicpm3.cpython-310.pyc
--- a/vllm/model_executor/models/pycache/minicpmv.cpython-310.pyc
+++ b/vllm/model_executor/models/pycache/minicpmv.cpython-310.pyc
--- a/vllm/model_executor/models/pycache/mixtral.cpython-310.pyc
+++ b/vllm/model_executor/models/pycache/mixtral.cpython-310.pyc
--- a/vllm/model_executor/models/pycache/mixtral_quant.cpython-310.pyc
+++ b/vllm/model_executor/models/pycache/mixtral_quant.cpython-310.pyc
--- a/vllm/model_executor/models/pycache/mllama.cpython-310.pyc
+++ b/vllm/model_executor/models/pycache/mllama.cpython-310.pyc
--- a/vllm/model_executor/models/pycache/mlp_speculator.cpython-310.pyc
+++ b/vllm/model_executor/models/pycache/mlp_speculator.cpython-310.pyc
--- a/vllm/model_executor/models/pycache/module_mapping.cpython-310.pyc
+++ b/vllm/model_executor/models/pycache/module_mapping.cpython-310.pyc
--- a/vllm/model_executor/models/pycache/molmo.cpython-310.pyc
+++ b/vllm/model_executor/models/pycache/molmo.cpython-310.pyc
--- a/vllm/model_executor/models/pycache/mpt.cpython-310.pyc
+++ b/vllm/model_executor/models/pycache/mpt.cpython-310.pyc
--- a/vllm/model_executor/models/pycache/nemotron.cpython-310.pyc
+++ b/vllm/model_executor/models/pycache/nemotron.cpython-310.pyc
--- a/vllm/model_executor/models/pycache/nvlm_d.cpython-310.pyc
+++ b/vllm/model_executor/models/pycache/nvlm_d.cpython-310.pyc
--- a/vllm/model_executor/models/pycache/olmo.cpython-310.pyc
+++ b/vllm/model_executor/models/pycache/olmo.cpython-310.pyc
--- a/vllm/model_executor/models/pycache/olmoe.cpython-310.pyc
+++ b/vllm/model_executor/models/pycache/olmoe.cpython-310.pyc
--- a/vllm/model_executor/models/pycache/opt.cpython-310.pyc
+++ b/vllm/model_executor/models/pycache/opt.cpython-310.pyc
--- a/vllm/model_executor/models/pycache/orion.cpython-310.pyc
+++ b/vllm/model_executor/models/pycache/orion.cpython-310.pyc
--- a/vllm/model_executor/models/pycache/paligemma.cpython-310.pyc
+++ b/vllm/model_executor/models/pycache/paligemma.cpython-310.pyc
--- a/vllm/model_executor/models/pycache/persimmon.cpython-310.pyc
+++ b/vllm/model_executor/models/pycache/persimmon.cpython-310.pyc
--- a/vllm/model_executor/models/pycache/phi.cpython-310.pyc
+++ b/vllm/model_executor/models/pycache/phi.cpython-310.pyc
--- a/vllm/model_executor/models/pycache/phi3.cpython-310.pyc
+++ b/vllm/model_executor/models/pycache/phi3.cpython-310.pyc
--- a/vllm/model_executor/models/pycache/phi3_small.cpython-310.pyc
+++ b/vllm/model_executor/models/pycache/phi3_small.cpython-310.pyc
--- a/vllm/model_executor/models/pycache/phi3v.cpython-310.pyc
+++ b/vllm/model_executor/models/pycache/phi3v.cpython-310.pyc
--- a/vllm/model_executor/models/pycache/phimoe.cpython-310.pyc
+++ b/vllm/model_executor/models/pycache/phimoe.cpython-310.pyc
--- a/vllm/model_executor/models/pycache/pixtral.cpython-310.pyc
+++ b/vllm/model_executor/models/pycache/pixtral.cpython-310.pyc
--- a/vllm/model_executor/models/pycache/qwen.cpython-310.pyc
+++ b/vllm/model_executor/models/pycache/qwen.cpython-310.pyc
--- a/vllm/model_executor/models/pycache/qwen2.cpython-310.pyc
+++ b/vllm/model_executor/models/pycache/qwen2.cpython-310.pyc
--- a/vllm/model_executor/models/pycache/qwen2_5_vl.cpython-310.pyc
+++ b/vllm/model_executor/models/pycache/qwen2_5_vl.cpython-310.pyc
--- a/vllm/model_executor/models/pycache/qwen2_moe.cpython-310.pyc
+++ b/vllm/model_executor/models/pycache/qwen2_moe.cpython-310.pyc
--- a/vllm/model_executor/models/pycache/qwen2_rm.cpython-310.pyc
+++ b/vllm/model_executor/models/pycache/qwen2_rm.cpython-310.pyc
--- a/vllm/model_executor/models/pycache/qwen2_vl.cpython-310.pyc
+++ b/vllm/model_executor/models/pycache/qwen2_vl.cpython-310.pyc
--- a/vllm/model_executor/models/pycache/qwen3.cpython-310.pyc
+++ b/vllm/model_executor/models/pycache/qwen3.cpython-310.pyc
--- a/vllm/model_executor/models/pycache/qwen3_moe.cpython-310.pyc
+++ b/vllm/model_executor/models/pycache/qwen3_moe.cpython-310.pyc
--- a/vllm/model_executor/models/pycache/registry.cpython-310.pyc
+++ b/vllm/model_executor/models/pycache/registry.cpython-310.pyc
--- a/vllm/model_executor/models/pycache/siglip.cpython-310.pyc
+++ b/vllm/model_executor/models/pycache/siglip.cpython-310.pyc
--- a/vllm/model_executor/models/pycache/solar.cpython-310.pyc
+++ b/vllm/model_executor/models/pycache/solar.cpython-310.pyc
--- a/vllm/model_executor/models/pycache/stablelm.cpython-310.pyc
+++ b/vllm/model_executor/models/pycache/stablelm.cpython-310.pyc
--- a/vllm/model_executor/models/pycache/starcoder2.cpython-310.pyc
+++ b/vllm/model_executor/models/pycache/starcoder2.cpython-310.pyc
--- a/vllm/model_executor/models/pycache/ultravox.cpython-310.pyc
+++ b/vllm/model_executor/models/pycache/ultravox.cpython-310.pyc
--- a/vllm/model_executor/models/pycache/utils.cpython-310.pyc
+++ b/vllm/model_executor/models/pycache/utils.cpython-310.pyc
--- a/vllm/model_executor/models/pycache/xverse.cpython-310.pyc
+++ b/vllm/model_executor/models/pycache/xverse.cpython-310.pyc
--- a/vllm/model_executor/models/internvl.py
+++ b/vllm/model_executor/models/internvl.py
@@ -4,7 +4,7 @@
 # Copyright (c) 2023 OpenGVLab
 # Licensed under The MIT License [see LICENSE for details]
 # --------------------------------------------------------
-import re
+import regex as re
 from functools import cached_property, partial
 from typing import (Iterable, List, Literal, Mapping, Optional, Tuple,
                    TypedDict, Union)
--- a/vllm/model_executor/models/minicpmv.py
+++ b/vllm/model_executor/models/minicpmv.py
@@ -22,7 +22,7 @@
 # limitations under the License.
 """Inference-only MiniCPM-V model compatible with HuggingFace weights."""
 import math
-import re
+import regex as re
 from functools import partial
 from typing import (Any, Callable, Iterable, List, Literal, Mapping, Optional,
                    Tuple, TypedDict, Union)
--- a/vllm/model_executor/models/molmo.py
+++ b/vllm/model_executor/models/molmo.py
@@ -1,6 +1,6 @@
 import logging
 import math
-import re
+import regex as re
 from array import array
 from dataclasses import dataclass
 from functools import lru_cache, partial
--- a/vllm/model_executor/models/phi3v.py
+++ b/vllm/model_executor/models/phi3v.py
@@ -14,7 +14,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import itertools
-import re
+import regex as re
 from functools import cached_property, lru_cache
 from typing import (Any, Dict, Iterable, List, Literal, Mapping, Optional,
                    Tuple, TypedDict, Union)
--- a/vllm/model_executor/models/qwen.py
+++ b/vllm/model_executor/models/qwen.py
@@ -6,7 +6,7 @@
 """Inference-only QWen model compatible with HuggingFace weights."""

 import math
-import re
+import regex as re
 from functools import partial
 from typing import (Any, Callable, Dict, Iterable, List, Literal, Mapping,
                    Optional, Tuple, TypedDict, Union)
--- a/vllm/model_executor/models/qwen2_5_vl.py
+++ b/vllm/model_executor/models/qwen2_5_vl.py
--- a/vllm/model_executor/models/qwen3.py
+++ b/vllm/model_executor/models/qwen3.py
@@ -136,11 +136,11 @@ class Qwen3Attention(nn.Module):
        # Add qk-norm
        q_by_head = q.view(*q.shape[:-1], q.shape[-1] // self.head_dim,
                           self.head_dim)
-        q_by_head = self.q_norm.forward_native(q_by_head)
+        q_by_head = self.q_norm.forward_cuda(q_by_head.contiguous())
        q = q_by_head.view(q.shape)
        k_by_head = k.view(*k.shape[:-1], k.shape[-1] // self.head_dim,
                           self.head_dim)
-        k_by_head = self.k_norm.forward_native(k_by_head)
+        k_by_head = self.k_norm.forward_cuda(k_by_head.contiguous())
        k = k_by_head.view(k.shape)
        q, k = self.rotary_emb(positions, q, k)
        attn_output = self.attn(q, k, v, kv_cache, attn_metadata)
--- a/vllm/model_executor/models/registry.py
+++ b/vllm/model_executor/models/registry.py
@@ -114,6 +114,7 @@ _MULTIMODAL_MODELS = {
    "PixtralForConditionalGeneration": ("pixtral", "PixtralForConditionalGeneration"),  # noqa: E501
    "QWenLMHeadModel": ("qwen", "QWenLMHeadModel"),
    "Qwen2VLForConditionalGeneration": ("qwen2_vl", "Qwen2VLForConditionalGeneration"),  # noqa: E501
+    "Qwen2_5_VLForConditionalGeneration": ("qwen2_5_vl", "Qwen2_5_VLForConditionalGeneration"),  # noqa: E501
    "UltravoxModel": ("ultravox", "UltravoxModel"),
    # [Encoder-decoder]
    "MllamaForConditionalGeneration": ("mllama", "MllamaForConditionalGeneration"),  # noqa: E501