add dispath_ffn_combine_bf16 (#5866)

### What this PR does / why we need it? add dispath_ffn_combine_bf16 - vLLM version: v0.13.0 - vLLM main: bde38c11df --------- Signed-off-by: guanguan0308 <1546542263@qq.com>
2026-01-21 09:30:30 +08:00
parent bec8641876
commit 1ed9524763
45 changed files with 8420 additions and 1 deletions
--- a/csrc/torch_binding.cpp
+++ b/csrc/torch_binding.cpp
@@ -738,7 +738,9 @@ at::Tensor& dispatch_ffn_combine(
    at::Tensor& out
 ) {
    char *group_ep_ptr = const_cast<char *>(group.data());
-    EXEC_NPU_CMD(aclnnDispatchFFNCombine,
+    bool is_int8 = weight1[0].dtype() == at::kChar;
+    if (is_int8) {
+        EXEC_NPU_CMD(aclnnDispatchFFNCombine,
                 x,
                 weight1,
                 weight2,
@@ -749,6 +751,19 @@ at::Tensor& dispatch_ffn_combine(
                 group_ep_ptr,
                 max_output_size,
                 out);
+    } else {
+        EXEC_NPU_CMD(aclnnDispatchFFNCombineBF16,
+                 x,
+                 weight1,
+                 weight2,
+                 expert_idx,
+                 scale1,
+                 scale2,
+                 probs,
+                 group_ep_ptr,
+                 max_output_size,
+                 out);
+    }    
    return out;
 }