Tiny remove comments about DeepEP on H20 (#7234)
This commit is contained in:
@@ -542,38 +542,6 @@ class _DeepEPDispatcherImplLowLatency(_DeepEPDispatcherImplBase):
|
|||||||
topk_idx: torch.Tensor,
|
topk_idx: torch.Tensor,
|
||||||
use_fp8: bool = False,
|
use_fp8: bool = False,
|
||||||
):
|
):
|
||||||
"""
|
|
||||||
# For H20, there will be an CUDA error: DeepEP/csrc/kernels/internode_ll.cu:337 'too many blocks in cooperative launch'.
|
|
||||||
# Please make sure to change DeepEP code in internode_ll.cu dispatch / combine as below first and then reinstall.
|
|
||||||
# More details refer: https://github.com/deepseek-ai/DeepEP/issues/15#issuecomment-2709715782
|
|
||||||
|
|
||||||
diff --git a/csrc/kernels/internode_ll.cu b/csrc/kernels/internode_ll.cu
|
|
||||||
index 76ae2e2..8ecd08f 100644
|
|
||||||
--- a/csrc/kernels/internode_ll.cu
|
|
||||||
+++ b/csrc/kernels/internode_ll.cu
|
|
||||||
@@ -310,8 +310,8 @@ void dispatch(void* packed_recv_x, float* packed_recv_x_scales,
|
|
||||||
int num_topk, int num_experts, int rank, int num_ranks, bool use_fp8,
|
|
||||||
void* workspace, cudaStream_t stream, int phases) {
|
|
||||||
constexpr int kNumMaxTopK = 9;
|
|
||||||
- constexpr int kNumWarpsPerGroup = 10;
|
|
||||||
- constexpr int kNumWarpGroups = 3;
|
|
||||||
+ constexpr int kNumWarpsPerGroup = 8;
|
|
||||||
+ constexpr int kNumWarpGroups = 4;
|
|
||||||
EP_STATIC_ASSERT(kNumMaxTopK + 1 <= kNumWarpGroups * kNumWarpsPerGroup, "Too many top-k selections");
|
|
||||||
|
|
||||||
const auto num_warps = kNumWarpGroups * kNumWarpsPerGroup;
|
|
||||||
@@ -501,8 +501,8 @@ void combine(void* combined_x,
|
|
||||||
int num_combined_tokens, int hidden, int num_max_dispatch_tokens_per_rank,
|
|
||||||
int num_topk, int num_experts, int rank, int num_ranks,
|
|
||||||
void* workspace, cudaStream_t stream, int phases) {
|
|
||||||
- constexpr int kNumWarpsPerGroup = 10;
|
|
||||||
- constexpr int kNumWarpGroups = 3;
|
|
||||||
+ constexpr int kNumWarpsPerGroup = 8;
|
|
||||||
+ constexpr int kNumWarpGroups = 4;
|
|
||||||
constexpr int kNumMaxTopk = 9;
|
|
||||||
|
|
||||||
const auto num_warps = kNumWarpGroups * kNumWarpsPerGroup;
|
|
||||||
"""
|
|
||||||
buffer = self._get_buffer()
|
buffer = self._get_buffer()
|
||||||
packed_recv_hidden, packed_recv_count, self.handle, event, hook = (
|
packed_recv_hidden, packed_recv_count, self.handle, event, hook = (
|
||||||
buffer.low_latency_dispatch(
|
buffer.low_latency_dispatch(
|
||||||
|
|||||||
Reference in New Issue
Block a user