Update to new version of base image
This commit is contained in:
10
Dockerfile
10
Dockerfile
@@ -1,19 +1,15 @@
|
||||
FROM git.modelhub.org.cn:9443/enginex-iluvatar/bi100-3.2.1-x86-ubuntu20.04-py3.10-poc-llm-infer:20250731115755
|
||||
FROM git.modelhub.org.cn:9443/enginex-iluvatar/bi100-3.2.3-x86-ubuntu20.04-py3.10-poc-llm-infer:v1.2.3
|
||||
|
||||
RUN pip install --no-cache-dir triton==2.1.0
|
||||
|
||||
COPY pkgs/triton /usr/local/corex/lib64/python3/dist-packages/triton
|
||||
COPY pkgs/triton-2.1.0+corex.4.1.2.dist-info /usr/local/corex/lib64/python3/dist-packages/triton-2.1.0+corex.4.1.2.dist-info
|
||||
COPY pkgs/xformers-0.0.22+corex.4.1.2.dist-info /usr/local/corex/lib64/python3/dist-packages/xformers-0.0.22+corex.4.1.2.dist-info
|
||||
COPY pkgs/xformers /usr/local/corex/lib64/python3/dist-packages/xformers
|
||||
|
||||
COPY paged_attn.py /usr/local/lib/python3.10/site-packages/vllm/attention/ops/paged_attn.py
|
||||
COPY __init__.py /usr/local/lib/python3.10/site-packages/vllm/triton_utils/__init__.py
|
||||
COPY prefix_prefill.py /usr/local/lib/python3.10/site-packages/vllm/attention/ops/prefix_prefill.py
|
||||
COPY paged_attn.py /usr/local/corex/lib64/python3/dist-packages/vllm/attention/ops/paged_attn.py
|
||||
COPY __init__.py /usr/local/corex/lib64/python3/dist-packages/vllm/triton_utils/__init__.py
|
||||
|
||||
RUN mkdir /workspace
|
||||
WORKDIR /workspace/
|
||||
|
||||
COPY ./launch_service /workspace/launch_service
|
||||
|
||||
ENTRYPOINT ["./launch_service"]
|
||||
|
||||
@@ -4,6 +4,7 @@ from typing import List, Optional, Tuple
|
||||
import torch
|
||||
|
||||
from vllm import _custom_ops as ops
|
||||
|
||||
from vllm.attention.ops.prefix_prefill import context_attention_fwd
|
||||
|
||||
# Should be the same as PARTITION_SIZE in `paged_attention_v2_launcher`.
|
||||
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -1102,4 +1102,4 @@ for k, v in names_and_values.items():
|
||||
names_and_values_to_update[k] = hint_on_error(v)
|
||||
|
||||
names_and_values.update(names_and_values_to_update)
|
||||
del names_and_values_to_update, names_and_values, v, k, fn_type
|
||||
del names_and_values_to_update, names_and_values, v, k, fn_type
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -27,4 +27,4 @@ class ImageAsset:
|
||||
"""
|
||||
image_path = get_vllm_public_assets(filename=f"{self.name}.pt",
|
||||
s3_prefix=VLM_IMAGES_DIR)
|
||||
return torch.load(image_path)
|
||||
return torch.load(image_path, weights_only=True)
|
||||
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -4,7 +4,10 @@ from typing import List, Optional, Tuple
|
||||
import torch
|
||||
|
||||
from vllm import _custom_ops as ops
|
||||
from vllm.attention.ops.prefix_prefill import context_attention_fwd
|
||||
from vllm.triton_utils import HAS_TRITON
|
||||
|
||||
if HAS_TRITON:
|
||||
from vllm.attention.ops.prefix_prefill import context_attention_fwd
|
||||
|
||||
# Should be the same as PARTITION_SIZE in `paged_attention_v2_launcher`.
|
||||
_PARTITION_SIZE = 512
|
||||
|
||||
@@ -808,8 +808,6 @@ if triton.__version__ >= "2.1.0":
|
||||
)
|
||||
return
|
||||
|
||||
import time
|
||||
ts_beg = time.time()
|
||||
_fwd_kernel[grid](
|
||||
q,
|
||||
k,
|
||||
@@ -860,6 +858,4 @@ if triton.__version__ >= "2.1.0":
|
||||
num_warps=NUM_WARPS,
|
||||
num_stages=1,
|
||||
)
|
||||
elapsed = time.time() - ts_beg
|
||||
#print(f'{elapsed}: {BLOCK=}, {Lk=}, {Lk_padded=}, {BLOCK=}, {sliding_window=}, {NUM_WARPS=}')
|
||||
return
|
||||
|
||||
@@ -204,9 +204,6 @@ def which_attn_to_use(
|
||||
if selected_backend != _Backend.PALLAS:
|
||||
logger.info("Cannot use %s backend on TPU.", selected_backend)
|
||||
return _Backend.PALLAS
|
||||
|
||||
if selected_backend == _Backend.FLASH_ATTN:
|
||||
print("selected_backend == _Backend.FLASH_ATTN")
|
||||
|
||||
if is_hip():
|
||||
# AMD GPUs.
|
||||
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -217,7 +217,8 @@ class MessageQueue:
|
||||
remote_subscribe_port = get_open_port()
|
||||
if is_valid_ipv6_address(connect_ip):
|
||||
self.remote_socket.setsockopt(IPV6, 1)
|
||||
socket_addr = f"tcp://*:{remote_subscribe_port}"
|
||||
connect_ip = f"[{connect_ip}]"
|
||||
socket_addr = f"tcp://{connect_ip}:{remote_subscribe_port}"
|
||||
self.remote_socket.bind(socket_addr)
|
||||
|
||||
else:
|
||||
|
||||
Binary file not shown.
Binary file not shown.
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user