[Feature] support deepseek v3/r1/v3.2 (#78)
* [Feature] support deepseek v3/r1/v3.2 * fix gpt_oss * update readme * update readme --------- Co-authored-by: hanhaowen <hanhaowen@baidu.com>
This commit is contained in:
@@ -80,6 +80,14 @@ def register_model():
|
||||
ModelRegistry.register_model(
|
||||
"GptOssForCausalLM",
|
||||
"vllm_kunlun.models.gpt_oss:GptOssForCausalLM")
|
||||
|
||||
|
||||
ModelRegistry.register_model(
|
||||
"DeepseekV3ForCausalLM",
|
||||
"vllm_kunlun.models.deepseek_v2:DeepseekV3ForCausalLM")
|
||||
|
||||
ModelRegistry.register_model(
|
||||
"DeepseekV32ForCausalLM",
|
||||
"vllm_kunlun.models.deepseek_v2:DeepseekV3ForCausalLM")
|
||||
|
||||
def register_quant_method():
|
||||
"""to do"""
|
||||
|
||||
1445
vllm_kunlun/models/deepseek_v2.py
Normal file
1445
vllm_kunlun/models/deepseek_v2.py
Normal file
File diff suppressed because it is too large
Load Diff
@@ -16,7 +16,7 @@ from vllm.distributed import (get_ep_group, get_pp_group,
|
||||
get_tensor_model_parallel_rank,
|
||||
get_tensor_model_parallel_world_size,
|
||||
tensor_model_parallel_all_gather)
|
||||
from vllm_kunlun.ops.fused_moe.layer import FusedMoE
|
||||
from vllm.model_executor.layers.fused_moe import FusedMoE
|
||||
from vllm.model_executor.layers.layernorm import RMSNorm
|
||||
from vllm.model_executor.layers.linear import (QKVParallelLinear,
|
||||
RowParallelLinear)
|
||||
@@ -176,7 +176,7 @@ class MLPBlock(torch.nn.Module):
|
||||
x = sequence_parallel_chunk(x)
|
||||
|
||||
g = self.router(x)
|
||||
x = self.experts(hidden_states=x, router_logits=g, linear_weights=self.router.weight)
|
||||
x = self.experts(hidden_states=x, router_logits=g)
|
||||
|
||||
if self.is_sequence_parallel:
|
||||
x = tensor_model_parallel_all_gather(x.contiguous(), 0)
|
||||
|
||||
@@ -21,7 +21,7 @@ from vllm.distributed import (
|
||||
tensor_model_parallel_all_gather,
|
||||
)
|
||||
from vllm.logger import init_logger
|
||||
from vllm_kunlun.ops.fused_moe.layer import FusedMoE
|
||||
from vllm.model_executor.layers.fused_moe import FusedMoE
|
||||
from vllm.model_executor.layers.layernorm import RMSNorm
|
||||
from vllm.model_executor.layers.linear import (
|
||||
MergedColumnParallelLinear,
|
||||
@@ -185,8 +185,7 @@ class MiMoV2MoE(nn.Module):
|
||||
gate_input = hidden_states
|
||||
router_logits = self.gate(gate_input)
|
||||
final_hidden_states = self.experts(
|
||||
hidden_states=hidden_states, router_logits=router_logits, linear_weights=self.gate.weight
|
||||
)
|
||||
hidden_states=hidden_states, router_logits=router_logits)
|
||||
|
||||
return final_hidden_states.squeeze(0) if is_input_1d else final_hidden_states
|
||||
|
||||
|
||||
Reference in New Issue
Block a user