Overlap two kernels in DeepSeek with communication (#6711)
This commit is contained in:
@@ -127,9 +127,9 @@ def _compute_moe_deepseek_blog_decode(layer):
|
|||||||
layer.mlp.op_combine_a,
|
layer.mlp.op_combine_a,
|
||||||
operations.YieldOperation(),
|
operations.YieldOperation(),
|
||||||
layer.mlp.op_combine_b,
|
layer.mlp.op_combine_b,
|
||||||
|
operations.YieldOperation(),
|
||||||
layer.mlp.op_output,
|
layer.mlp.op_output,
|
||||||
layer.op_comm_postprocess_layer,
|
layer.op_comm_postprocess_layer,
|
||||||
operations.YieldOperation(),
|
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user