From 01d47a27b6f6af1620b1146804208b269618e2a2 Mon Sep 17 00:00:00 2001 From: chenxu140 Date: Wed, 20 Aug 2025 01:09:48 +0800 Subject: [PATCH] [Bugfix] fix kv buffer register & dp attention & deepepmoe (#9327) --- python/sglang/srt/disaggregation/ascend/conn.py | 4 +--- python/sglang/srt/layers/dp_attention.py | 2 +- python/sglang/srt/layers/moe/ep_moe/layer.py | 2 +- 3 files changed, 3 insertions(+), 5 deletions(-) diff --git a/python/sglang/srt/disaggregation/ascend/conn.py b/python/sglang/srt/disaggregation/ascend/conn.py index 504212e0a..3e988c0a4 100644 --- a/python/sglang/srt/disaggregation/ascend/conn.py +++ b/python/sglang/srt/disaggregation/ascend/conn.py @@ -23,9 +23,7 @@ class AscendKVManager(MooncakeKVManager): ) def register_buffer_to_engine(self): - self.engine.register( - self.kv_args.kv_data_ptrs[0], sum(self.kv_args.kv_data_lens) - ) + self.engine.batch_register(self.kv_args.kv_data_ptrs, self.kv_args.kv_data_lens) # The Ascend backend optimize batch registration for small memory blocks. self.engine.batch_register( self.kv_args.aux_data_ptrs, self.kv_args.aux_data_lens diff --git a/python/sglang/srt/layers/dp_attention.py b/python/sglang/srt/layers/dp_attention.py index 58f6e0f9c..1250636eb 100644 --- a/python/sglang/srt/layers/dp_attention.py +++ b/python/sglang/srt/layers/dp_attention.py @@ -234,7 +234,7 @@ def initialize_dp_attention( _DpGatheredBufferWrapper.set_metadata( hidden_size=model_config.hidden_size, dtype=model_config.dtype, - device=torch.device("cuda"), + device=torch.device(server_args.device), ) diff --git a/python/sglang/srt/layers/moe/ep_moe/layer.py b/python/sglang/srt/layers/moe/ep_moe/layer.py index 32684c606..97e16a90e 100644 --- a/python/sglang/srt/layers/moe/ep_moe/layer.py +++ b/python/sglang/srt/layers/moe/ep_moe/layer.py @@ -736,7 +736,7 @@ class DeepEPMoE(EPMoE): assert isinstance(dispatch_output, AscendDeepEPLLOutput) hidden_states, topk_idx, topk_weights, _, seg_indptr, _ = dispatch_output assert self.quant_method is not None - assert self.activation == "silu" + assert self.moe_runner_config.activation == "silu" # NOTE: Ascend's Dispatch & Combine does not support FP16 output_dtype = torch.bfloat16