init v0.11.0rc0
This commit is contained in:
@@ -165,6 +165,11 @@ def register_torchair_model():
|
||||
"vllm_ascend.torchair.models.torchair_deepseek_v3:TorchairDeepseekV3ForCausalLM"
|
||||
)
|
||||
|
||||
ModelRegistry.register_model(
|
||||
"DeepseekV32ForCausalLM",
|
||||
"vllm_ascend.torchair.models.torchair_deepseek_v3:TorchairDeepseekV3ForCausalLM"
|
||||
)
|
||||
|
||||
ModelRegistry.register_model(
|
||||
"Qwen2ForCausalLM",
|
||||
"vllm_ascend.torchair.models.qwen2:CustomQwen2ForCausalLM")
|
||||
@@ -180,20 +185,31 @@ def register_torchair_model():
|
||||
|
||||
|
||||
def torchair_quant_method_register():
|
||||
from vllm_ascend.quantization.quantizer import \
|
||||
SUPPORT_ASCEND_QUANTIZER_TYPE
|
||||
from vllm_ascend.torchair.quantization.torchair_quantizer import (
|
||||
TorchairW4A8DYNAMICQuantizer, TorchairW8A8DYNAMICQuantizer)
|
||||
from vllm_ascend.quantization.utils import ASCEND_QUANTIZATION_METHOD_MAP
|
||||
from vllm_ascend.torchair.quantization.torchair_w4a8_dynamic import (
|
||||
TorchairAscendW4A8DynamicFusedMoEMethod,
|
||||
TorchairAscendW4A8DynamicLinearMethod)
|
||||
from vllm_ascend.torchair.quantization.torchair_w8a8_dynamic import (
|
||||
TorchairAscendW8A8DynamicFusedMoEMethod,
|
||||
TorchairAscendW8A8DynamicLinearMethod)
|
||||
|
||||
SUPPORT_ASCEND_QUANTIZER_TYPE[
|
||||
"W8A8_DYNAMIC"] = TorchairW8A8DYNAMICQuantizer
|
||||
SUPPORT_ASCEND_QUANTIZER_TYPE[
|
||||
"W4A8_DYNAMIC"] = TorchairW4A8DYNAMICQuantizer
|
||||
ASCEND_QUANTIZATION_METHOD_MAP["W8A8_DYNAMIC"][
|
||||
"linear"] = TorchairAscendW8A8DynamicLinearMethod
|
||||
ASCEND_QUANTIZATION_METHOD_MAP["W8A8_DYNAMIC"][
|
||||
"moe"] = TorchairAscendW8A8DynamicFusedMoEMethod
|
||||
ASCEND_QUANTIZATION_METHOD_MAP["W4A8_DYNAMIC"][
|
||||
"linear"] = TorchairAscendW4A8DynamicLinearMethod
|
||||
ASCEND_QUANTIZATION_METHOD_MAP["W4A8_DYNAMIC"][
|
||||
"moe"] = TorchairAscendW4A8DynamicFusedMoEMethod
|
||||
|
||||
|
||||
def torchair_ops_patch():
|
||||
from vllm_ascend.ops.activation import AscendSiluAndMul
|
||||
from vllm_ascend.ops.layernorm import AscendRMSNorm
|
||||
from vllm_ascend.ops.rotary_embedding import (
|
||||
AscendDeepseekScalingRotaryEmbedding, AscendRotaryEmbedding)
|
||||
from vllm_ascend.torchair.ops import (torchair_activation,
|
||||
torchair_layernorm)
|
||||
from vllm_ascend.torchair.ops.torchair_rotary_embedding import (
|
||||
deepseek_rope_init_func, native_rope_deepseek_forward,
|
||||
qwen_rope_init_func, rope_forward)
|
||||
@@ -203,3 +219,6 @@ def torchair_ops_patch():
|
||||
|
||||
AscendDeepseekScalingRotaryEmbedding.__init__ = deepseek_rope_init_func # type: ignore[method-assign]
|
||||
AscendDeepseekScalingRotaryEmbedding.forward = native_rope_deepseek_forward # type: ignore[method-assign]
|
||||
|
||||
AscendRMSNorm.forward_oot = torchair_layernorm.torchair_rmsnorm_forward_oot # type: ignore[method-assign]
|
||||
AscendSiluAndMul.forward_oot = torchair_activation.torchair_silu_and_mul_forward_oot # type: ignore[method-assign]
|
||||
|
||||
Reference in New Issue
Block a user