Update to new version of base image
This commit is contained in:
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -4,7 +4,10 @@ from typing import List, Optional, Tuple
|
||||
import torch
|
||||
|
||||
from vllm import _custom_ops as ops
|
||||
from vllm.attention.ops.prefix_prefill import context_attention_fwd
|
||||
from vllm.triton_utils import HAS_TRITON
|
||||
|
||||
if HAS_TRITON:
|
||||
from vllm.attention.ops.prefix_prefill import context_attention_fwd
|
||||
|
||||
# Should be the same as PARTITION_SIZE in `paged_attention_v2_launcher`.
|
||||
_PARTITION_SIZE = 512
|
||||
|
||||
@@ -808,8 +808,6 @@ if triton.__version__ >= "2.1.0":
|
||||
)
|
||||
return
|
||||
|
||||
import time
|
||||
ts_beg = time.time()
|
||||
_fwd_kernel[grid](
|
||||
q,
|
||||
k,
|
||||
@@ -860,6 +858,4 @@ if triton.__version__ >= "2.1.0":
|
||||
num_warps=NUM_WARPS,
|
||||
num_stages=1,
|
||||
)
|
||||
elapsed = time.time() - ts_beg
|
||||
#print(f'{elapsed}: {BLOCK=}, {Lk=}, {Lk_padded=}, {BLOCK=}, {sliding_window=}, {NUM_WARPS=}')
|
||||
return
|
||||
|
||||
Reference in New Issue
Block a user