32 lines
799 B
YAML
32 lines
799 B
YAML
services:
|
|
sglang:
|
|
image: lmsysorg/sglang:latest
|
|
container_name: sglang
|
|
volumes:
|
|
- ${HOME}/.cache/huggingface:/root/.cache/huggingface
|
|
restart: always
|
|
network_mode: host
|
|
# Or you can only publish port 30000
|
|
# ports:
|
|
# - 30000:30000
|
|
environment:
|
|
HF_TOKEN: <secret>
|
|
entrypoint: python3 -m sglang.launch_server
|
|
command:
|
|
--model-path meta-llama/Meta-Llama-3.1-8B-Instruct
|
|
--host 0.0.0.0
|
|
--port 30000
|
|
ulimits:
|
|
memlock: -1
|
|
stack: 67108864
|
|
ipc: host
|
|
healthcheck:
|
|
test: ["CMD-SHELL", "curl -f http://localhost:30000/health || exit 1"]
|
|
deploy:
|
|
resources:
|
|
reservations:
|
|
devices:
|
|
- driver: nvidia
|
|
device_ids: ['0']
|
|
capabilities: [gpu]
|