Migrate XTorch operations to Kunlun operations (accelerating iteration) (#177)

Signed-off-by: dongxinyu03 <dongxinyu03@baidu.com>
2026-02-12 18:13:00 +08:00
parent 744719587e
commit bf9369f733
15 changed files with 125 additions and 119 deletions
--- a/vllm_kunlun/ops/activation.py
+++ b/vllm_kunlun/ops/activation.py
@@ -93,7 +93,7 @@ class SiluAndMul(CustomOp):

    def forward_cuda(self, x: torch.Tensor) -> torch.Tensor:
        """forward_cuda"""
-        import xtorch_ops
+        import kunlun_ops
        
        d = x.shape[-1] // 2
        output_shape = (x.shape[:-1] + (d, ))
@@ -103,7 +103,7 @@ class SiluAndMul(CustomOp):

    def forward_kunlun(self, x: torch.Tensor) -> torch.Tensor:
        """forward_kunlun"""
-        import xtorch_ops
+        import kunlun_ops
        
        d = x.shape[-1] // 2
        output_shape = (x.shape[:-1] + (d, ))
@@ -251,14 +251,14 @@ class GeluAndMul(CustomOp):
            无。
        """
        # from vllm import _custom_ops as ops
-        import xtorch_ops
+        import kunlun_ops
        # d = x.shape[-1] // 2
        # output_shape = (x.shape[:-1] + (d, ))
        out = torch.empty(x, dtype=x.dtype, device=x.device)
        if self.approximate == "none":
            # ops.gelu_and_mul(out, x)
            print(x,x.shape)
-            xtorch_ops.gelu(x, out)
+            kunlun_ops.gelu(x, out)
        elif self.approximate == "tanh":
            ops.gelu_tanh_and_mul(out, x)
        return out