fix some typos (#6209)

Co-authored-by: Brayden Zhong <b8zhong@uwaterloo.ca>
This commit is contained in:
applesaucethebun
2025-05-12 13:42:38 -04:00
committed by GitHub
parent 3ee40ff919
commit d738ab52f8
95 changed files with 276 additions and 276 deletions

View File

@@ -664,7 +664,7 @@ class FlashInferIndicesUpdaterDecode:
kv_indptr = kv_indptr[: bs + 1]
if wrapper.is_cuda_graph_enabled:
# Directly write to the cuda graph input buffer
# Directly write to the CUDA graph input buffer
kv_indices = wrapper._paged_kv_indices_buf
else:
kv_indices = torch.empty(
@@ -1173,7 +1173,7 @@ def fast_decode_plan(
"""
A faster version of BatchDecodeWithPagedKVCacheWrapper::plan used for FlashInferMultiStepDraftBackend.
Modifications:
- Remove unnecessary device-to-device copy for the cuda graph buffers.
- Remove unnecessary device-to-device copy for the CUDA graph buffers.
- Remove unnecessary host-to-device copy for the metadata buffers.
"""
batch_size = len(last_page_len)