diff --git a/vllm_ascend/patch/__init__.py b/vllm_ascend/patch/__init__.py
index 575d3acf..1b346de6 100644
--- a/vllm_ascend/patch/__init__.py
+++ b/vllm_ascend/patch/__init__.py
@@ -104,29 +104,7 @@
 #    Future Plan:
 #       Remove this patch when vllm merged them.
 #
-# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-#   1. `vllm.v1.sample.sampler.Sampler.gather_logprobs`
-#    Why:
-#       We need to patch gather_logprobs to make sure call batched_count_greater_than
-#       with backend=current_platform.simple_compile_backend
-#    How：
-#       Patch gather_logprobs call new batched_count_greater_than
-#    Related PR (if no, explain why):
-#       - https://github.com/vllm-project/vllm/pull/21591
-#    Future Plan:
-#       Revert it when vLLM merge #21591 and release new version
-# ** File: worker/patch_logits.py **
-# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-#   1. `vllm._custom_ops.apply_repetition_penalties`
-#    Why:
-#       apply_repetition_penalties in vLLM use tensor.is_cuda to check if tensor is on cuda. But the value is always True
-#       on ascend, thus we need to patch apply_repetition_penalties.
-#    How：
-#       Remove the related cuda check in apply_repetition_penalties.
-#    Related PR (if no, explain why):
-#       - this is a bug by Ascend only. It can' be fixed in vLLM.
-#    Future Plan:
-#       Fix this bug in torch-npu, bump torch-npu version and remove this patch.
+# ** File: worker/patch_roberta.py **
 # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 #   1. `vllm.model_executor.models.roberta.RobertaEmbedding.forward`
 #    Why:
diff --git a/vllm_ascend/patch/worker/__init__.py b/vllm_ascend/patch/worker/__init__.py
index 43a2e800..a361789f 100644
--- a/vllm_ascend/patch/worker/__init__.py
+++ b/vllm_ascend/patch/worker/__init__.py
@@ -23,7 +23,6 @@ if HAS_TRITON:
 # isort: off
 import vllm_ascend.patch.platform.patch_sched_yield  # noqa
 import vllm_ascend.patch.worker.patch_distributed  # noqa
-import vllm_ascend.patch.worker.patch_logits  # noqa
 import vllm_ascend.patch.worker.patch_roberta  # noqa
 import vllm_ascend.patch.worker.patch_weight_loader  # noqa
 import vllm_ascend.patch.worker.patch_multimodal_merge  # noqa
diff --git a/vllm_ascend/patch/worker/patch_logits.py b/vllm_ascend/patch/worker/patch_logits.py
deleted file mode 100644
index 84a92f91..00000000
--- a/vllm_ascend/patch/worker/patch_logits.py
+++ /dev/null
@@ -1,26 +0,0 @@
-import torch
-import vllm
-from vllm._custom_ops import apply_repetition_penalties_torch
-
-
-def apply_repetition_penalties(logits: torch.Tensor, prompt_mask: torch.Tensor,
-                               output_mask: torch.Tensor,
-                               repetition_penalties: torch.Tensor) -> None:
-    """Apply repetition penalties to logits in-place.
-
-    Args:
-        logits: The logits tensor of shape [num_seqs, vocab_size].
-        prompt_mask: A boolean tensor indicating which tokens appear in the prompt.
-        output_mask: A boolean tensor indicating which tokens appear in the output.
-        repetition_penalties: The repetition penalties of shape (num_seqs, ).
-    """
-    apply_repetition_penalties_torch(logits, prompt_mask, output_mask,
-                                     repetition_penalties)
-
-
-# NPU device type tensors have attributes is_cuda=True and is_npu=True, according to its implementation in
-# https://github.com/Ascend/pytorch/blob/863b9071cbdf47023c12c246e3efa9c6e2285fc6/torch_npu/npu/_stream_check.py#L74
-# This causes that vLLM's apply_repetition_penalties function will run into the branch of "if logits.is_cuda" and
-# call the custom op implemented in CUDA, which is not compatible with NPU.
-# Reference: https://github.com/vllm-project/vllm/blob/f66673a39d9f364194c249f28098cad8a5584ccb/vllm/_custom_ops.py#L314
-vllm._custom_ops.apply_repetition_penalties = apply_repetition_penalties