upgrade torch npu version (#4433)
vLLM graph feature now rely on torch >=2.8. To make graph mode work, we need upgrade torch version as well. For long term support, upgrade torch to a newer one is good to go as well. Related vLLM change: https://github.com/vllm-project/vllm/pull/25110 - vLLM version: v0.11.2 - vLLM main: https://github.com/vllm-project/vllm/commit/v0.11.2
This commit is contained in:
@@ -119,8 +119,10 @@ class AscendW8A8LinearMethod:
|
||||
weight=layer.weight,
|
||||
start_flag=x,
|
||||
)
|
||||
|
||||
quant_comm_config = getattr(layer, "_quant_comm_config", {})
|
||||
try:
|
||||
quant_comm_config = getattr(layer, "_quant_comm_config")
|
||||
except AttributeError:
|
||||
quant_comm_config = {}
|
||||
comm_fn = quant_comm_config.get("communication_fn")
|
||||
enable_flashcomm2_quant_comm = comm_fn is not None and (
|
||||
"o_proj" in layer.prefix or "out_proj" in layer.prefix)
|
||||
@@ -151,8 +153,12 @@ class AscendW8A8LinearMethod:
|
||||
)
|
||||
|
||||
quant_bias = layer.quant_bias if tp_rank == 0 else None
|
||||
if getattr(layer, "ascend_quant_method",
|
||||
"") == COMPRESSED_TENSORS_METHOD:
|
||||
|
||||
try:
|
||||
ascend_quant_method = getattr(layer, "ascend_quant_method")
|
||||
except AttributeError:
|
||||
ascend_quant_method = ""
|
||||
if ascend_quant_method == COMPRESSED_TENSORS_METHOD:
|
||||
quant_bias = bias
|
||||
|
||||
if get_ascend_device_type() == AscendDeviceType._310P:
|
||||
@@ -194,8 +200,13 @@ class AscendW8A8LinearMethod:
|
||||
layer.weight_scale.data = torch.flatten(layer.weight_scale.data)
|
||||
layer.weight_offset.data = torch.flatten(layer.weight_offset.data)
|
||||
layer.bias.data = layer.bias.data.to(layer.weight_scale.data.dtype)
|
||||
if getattr(layer, "ascend_quant_method",
|
||||
"") == COMPRESSED_TENSORS_METHOD:
|
||||
|
||||
try:
|
||||
ascend_quant_method = getattr(layer, "ascend_quant_method")
|
||||
except AttributeError:
|
||||
ascend_quant_method = ""
|
||||
|
||||
if ascend_quant_method == COMPRESSED_TENSORS_METHOD:
|
||||
deq_scale = layer.input_scale.data * layer.weight_scale.data
|
||||
layer.deq_scale = torch.nn.Parameter(deq_scale,
|
||||
requires_grad=False)
|
||||
|
||||
Reference in New Issue
Block a user