Upgrade to vllm 0.17.0 corex v4.1 overlay

This commit is contained in:
2026-04-29 19:38:22 +08:00
parent 8fac6062e4
commit 938d0854a5
430 changed files with 35969 additions and 14511 deletions

View File

@@ -7,6 +7,7 @@ from typing import Any
import gguf
import torch
import torch.nn.functional as F
from gguf import GGMLQuantizationType as WeightType
from torch.nn.parameter import Parameter, UninitializedParameter
@@ -234,7 +235,7 @@ try:
op_func=_fused_mul_mat_gguf,
fake_impl=_fused_mul_mat_gguf_fake,
)
fused_mul_mat_gguf = torch.ops.vllm._fused_mul_mat_gguf
fused_mul_mat_gguf = _fused_mul_mat_gguf
except AttributeError as error:
raise error
@@ -365,7 +366,7 @@ try:
op_func=_fused_moe_gguf,
fake_impl=_fused_moe_gguf_fake,
)
fused_moe_gguf = torch.ops.vllm._fused_moe_gguf
fused_moe_gguf = _fused_moe_gguf
except AttributeError as error:
raise error
@@ -410,7 +411,7 @@ try:
op_func=_apply_gguf_embedding,
fake_impl=_apply_gguf_embedding_fake,
)
apply_gguf_embedding = torch.ops.vllm._apply_gguf_embedding
apply_gguf_embedding = _apply_gguf_embedding
except AttributeError as error:
raise error
@@ -451,6 +452,9 @@ class GGUFLinearMethod(LinearMethodBase):
"data_container": [],
"shard_id": [],
"shard_id_map": {},
"params_dtype": params_dtype,
"input_size_per_partition" :input_size_per_partition, # restore shape for qkv and merge
"output_partition_sizes" :output_partition_sizes,
},
)
set_weight_attrs(qweight, extra_weight_attrs)
@@ -664,6 +668,10 @@ class GGUFEmbeddingMethod(GGUFLinearMethod):
"""
def embedding(self, layer: torch.nn.Module, x: torch.Tensor) -> torch.Tensor:
weight = layer.weight
return F.embedding(x, weight)
def process_weights_after_loading(self, layer: torch.nn.Module) -> None:
qweight = layer.qweight
qweight_type = layer.qweight_type.weight_type
hidden_size = qweight.tensor_shape[1]