Update to new version of base image

This commit is contained in:
2025-10-24 15:45:06 +08:00
parent ee04aead1e
commit fad74b701b
476 changed files with 1270 additions and 46 deletions

View File

@@ -4,7 +4,10 @@ from typing import List, Optional, Tuple
import torch
from vllm import _custom_ops as ops
from vllm.attention.ops.prefix_prefill import context_attention_fwd
from vllm.triton_utils import HAS_TRITON
if HAS_TRITON:
from vllm.attention.ops.prefix_prefill import context_attention_fwd
# Should be the same as PARTITION_SIZE in `paged_attention_v2_launcher`.
_PARTITION_SIZE = 512

View File

@@ -808,8 +808,6 @@ if triton.__version__ >= "2.1.0":
)
return
import time
ts_beg = time.time()
_fwd_kernel[grid](
q,
k,
@@ -860,6 +858,4 @@ if triton.__version__ >= "2.1.0":
num_warps=NUM_WARPS,
num_stages=1,
)
elapsed = time.time() - ts_beg
#print(f'{elapsed}: {BLOCK=}, {Lk=}, {Lk_padded=}, {BLOCK=}, {sliding_window=}, {NUM_WARPS=}')
return