From 1b40665548f048c3417b79207bb6f1a930475624 Mon Sep 17 00:00:00 2001 From: 22dimensions Date: Fri, 15 Aug 2025 10:27:43 +0800 Subject: [PATCH] [Misc] remove unused file (cache.py) (#2377) ### What this PR does / why we need it? cache.py only contains a function that will never be called, so remove it. ### Does this PR introduce _any_ user-facing change? No - vLLM version: v0.10.0 - vLLM main: https://github.com/vllm-project/vllm/commit/f1f0d2fab8a1b1ced68ccf5b7197393cf01e1a02 Signed-off-by: 22dimensions --- vllm_ascend/ops/cache.py | 35 ----------------------------------- 1 file changed, 35 deletions(-) delete mode 100644 vllm_ascend/ops/cache.py diff --git a/vllm_ascend/ops/cache.py b/vllm_ascend/ops/cache.py deleted file mode 100644 index d4bd08bc..00000000 --- a/vllm_ascend/ops/cache.py +++ /dev/null @@ -1,35 +0,0 @@ -# Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved. -# This file is a part of the vllm-ascend project. -# Adapted from vllm/tests/kernels/test_moe.py -# Copyright 2023 The vLLM team. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import torch - - -def concat_and_cache_mla( - kv_c_normed: torch.Tensor, # [num_tokens, num_kv_head, nope] - k_pe: torch.Tensor, # [num_tokens, num_kv_head, rope] - kv_cache: torch. - Tensor, # [num_blocks, block_size, num_kv_head, nope + rope] - slot_mapping, # [num_tokens] -): - num_blocks = kv_cache.size()[0] - block_size = kv_cache.size()[1] - num_kv_head = k_pe.size()[1] - - idx_for_copy = slot_mapping // block_size * block_size + slot_mapping % block_size - kv_cache = kv_cache.view(num_blocks * block_size, num_kv_head, -1) - kv_cache[idx_for_copy] = torch.cat([kv_c_normed.unsqueeze(1), k_pe], - dim=-1)