Upgrade to vllm 0.17.0 corex v4.1 overlay
This commit is contained in:
@@ -7,6 +7,7 @@ from typing import Any
|
||||
|
||||
import gguf
|
||||
import torch
|
||||
import torch.nn.functional as F
|
||||
from gguf import GGMLQuantizationType as WeightType
|
||||
from torch.nn.parameter import Parameter, UninitializedParameter
|
||||
|
||||
@@ -234,7 +235,7 @@ try:
|
||||
op_func=_fused_mul_mat_gguf,
|
||||
fake_impl=_fused_mul_mat_gguf_fake,
|
||||
)
|
||||
fused_mul_mat_gguf = torch.ops.vllm._fused_mul_mat_gguf
|
||||
fused_mul_mat_gguf = _fused_mul_mat_gguf
|
||||
|
||||
except AttributeError as error:
|
||||
raise error
|
||||
@@ -365,7 +366,7 @@ try:
|
||||
op_func=_fused_moe_gguf,
|
||||
fake_impl=_fused_moe_gguf_fake,
|
||||
)
|
||||
fused_moe_gguf = torch.ops.vllm._fused_moe_gguf
|
||||
fused_moe_gguf = _fused_moe_gguf
|
||||
|
||||
except AttributeError as error:
|
||||
raise error
|
||||
@@ -410,7 +411,7 @@ try:
|
||||
op_func=_apply_gguf_embedding,
|
||||
fake_impl=_apply_gguf_embedding_fake,
|
||||
)
|
||||
apply_gguf_embedding = torch.ops.vllm._apply_gguf_embedding
|
||||
apply_gguf_embedding = _apply_gguf_embedding
|
||||
|
||||
except AttributeError as error:
|
||||
raise error
|
||||
@@ -451,6 +452,9 @@ class GGUFLinearMethod(LinearMethodBase):
|
||||
"data_container": [],
|
||||
"shard_id": [],
|
||||
"shard_id_map": {},
|
||||
"params_dtype": params_dtype,
|
||||
"input_size_per_partition" :input_size_per_partition, # restore shape for qkv and merge
|
||||
"output_partition_sizes" :output_partition_sizes,
|
||||
},
|
||||
)
|
||||
set_weight_attrs(qweight, extra_weight_attrs)
|
||||
@@ -664,6 +668,10 @@ class GGUFEmbeddingMethod(GGUFLinearMethod):
|
||||
"""
|
||||
|
||||
def embedding(self, layer: torch.nn.Module, x: torch.Tensor) -> torch.Tensor:
|
||||
weight = layer.weight
|
||||
return F.embedding(x, weight)
|
||||
|
||||
def process_weights_after_loading(self, layer: torch.nn.Module) -> None:
|
||||
qweight = layer.qweight
|
||||
qweight_type = layer.qweight_type.weight_type
|
||||
hidden_size = qweight.tensor_shape[1]
|
||||
|
||||
Reference in New Issue
Block a user