init v0.11.0rc0

This commit is contained in:
2025-10-14 10:38:28 +08:00
parent 67afd0ea78
commit 66dc16f966
278 changed files with 28130 additions and 11708 deletions

View File

@@ -165,6 +165,11 @@ def register_torchair_model():
"vllm_ascend.torchair.models.torchair_deepseek_v3:TorchairDeepseekV3ForCausalLM"
)
ModelRegistry.register_model(
"DeepseekV32ForCausalLM",
"vllm_ascend.torchair.models.torchair_deepseek_v3:TorchairDeepseekV3ForCausalLM"
)
ModelRegistry.register_model(
"Qwen2ForCausalLM",
"vllm_ascend.torchair.models.qwen2:CustomQwen2ForCausalLM")
@@ -180,20 +185,31 @@ def register_torchair_model():
def torchair_quant_method_register():
from vllm_ascend.quantization.quantizer import \
SUPPORT_ASCEND_QUANTIZER_TYPE
from vllm_ascend.torchair.quantization.torchair_quantizer import (
TorchairW4A8DYNAMICQuantizer, TorchairW8A8DYNAMICQuantizer)
from vllm_ascend.quantization.utils import ASCEND_QUANTIZATION_METHOD_MAP
from vllm_ascend.torchair.quantization.torchair_w4a8_dynamic import (
TorchairAscendW4A8DynamicFusedMoEMethod,
TorchairAscendW4A8DynamicLinearMethod)
from vllm_ascend.torchair.quantization.torchair_w8a8_dynamic import (
TorchairAscendW8A8DynamicFusedMoEMethod,
TorchairAscendW8A8DynamicLinearMethod)
SUPPORT_ASCEND_QUANTIZER_TYPE[
"W8A8_DYNAMIC"] = TorchairW8A8DYNAMICQuantizer
SUPPORT_ASCEND_QUANTIZER_TYPE[
"W4A8_DYNAMIC"] = TorchairW4A8DYNAMICQuantizer
ASCEND_QUANTIZATION_METHOD_MAP["W8A8_DYNAMIC"][
"linear"] = TorchairAscendW8A8DynamicLinearMethod
ASCEND_QUANTIZATION_METHOD_MAP["W8A8_DYNAMIC"][
"moe"] = TorchairAscendW8A8DynamicFusedMoEMethod
ASCEND_QUANTIZATION_METHOD_MAP["W4A8_DYNAMIC"][
"linear"] = TorchairAscendW4A8DynamicLinearMethod
ASCEND_QUANTIZATION_METHOD_MAP["W4A8_DYNAMIC"][
"moe"] = TorchairAscendW4A8DynamicFusedMoEMethod
def torchair_ops_patch():
from vllm_ascend.ops.activation import AscendSiluAndMul
from vllm_ascend.ops.layernorm import AscendRMSNorm
from vllm_ascend.ops.rotary_embedding import (
AscendDeepseekScalingRotaryEmbedding, AscendRotaryEmbedding)
from vllm_ascend.torchair.ops import (torchair_activation,
torchair_layernorm)
from vllm_ascend.torchair.ops.torchair_rotary_embedding import (
deepseek_rope_init_func, native_rope_deepseek_forward,
qwen_rope_init_func, rope_forward)
@@ -203,3 +219,6 @@ def torchair_ops_patch():
AscendDeepseekScalingRotaryEmbedding.__init__ = deepseek_rope_init_func # type: ignore[method-assign]
AscendDeepseekScalingRotaryEmbedding.forward = native_rope_deepseek_forward # type: ignore[method-assign]
AscendRMSNorm.forward_oot = torchair_layernorm.torchair_rmsnorm_forward_oot # type: ignore[method-assign]
AscendSiluAndMul.forward_oot = torchair_activation.torchair_silu_and_mul_forward_oot # type: ignore[method-assign]