adapt to sglang v0.5.2rc1 on dcu
This commit is contained in:
35
docker/compose.yaml
Normal file
35
docker/compose.yaml
Normal file
@@ -0,0 +1,35 @@
|
||||
services:
|
||||
sglang:
|
||||
image: lmsysorg/sglang:latest
|
||||
container_name: sglang
|
||||
volumes:
|
||||
- ${HOME}/.cache/huggingface:/root/.cache/huggingface
|
||||
# If you use modelscope, you need mount this directory
|
||||
# - ${HOME}/.cache/modelscope:/root/.cache/modelscope
|
||||
restart: always
|
||||
network_mode: host # required by RDMA
|
||||
privileged: true # required by RDMA
|
||||
# Or you can only publish port 30000
|
||||
# ports:
|
||||
# - 30000:30000
|
||||
environment:
|
||||
HF_TOKEN: <secret>
|
||||
# if you use modelscope to download model, you need set this environment
|
||||
# - SGLANG_USE_MODELSCOPE: true
|
||||
entrypoint: python3 -m sglang.launch_server
|
||||
command: --model-path meta-llama/Llama-3.1-8B-Instruct
|
||||
--host 0.0.0.0
|
||||
--port 30000
|
||||
ulimits:
|
||||
memlock: -1
|
||||
stack: 67108864
|
||||
ipc: host
|
||||
healthcheck:
|
||||
test: ["CMD-SHELL", "curl -f http://localhost:30000/health || exit 1"]
|
||||
deploy:
|
||||
resources:
|
||||
reservations:
|
||||
devices:
|
||||
- driver: nvidia
|
||||
device_ids: ["0"]
|
||||
capabilities: [gpu]
|
||||
Reference in New Issue
Block a user