adapt to sglang v0.5.2rc1 on dcu

2025-09-04 15:56:33 +08:00
commit 909abb58f5
2320 changed files with 489411 additions and 0 deletions
--- a/docker/compose.yaml
+++ b/docker/compose.yaml
@@ -0,0 +1,35 @@
+services:
+  sglang:
+    image: lmsysorg/sglang:latest
+    container_name: sglang
+    volumes:
+      - ${HOME}/.cache/huggingface:/root/.cache/huggingface
+      # If you use modelscope, you need mount this directory
+      # - ${HOME}/.cache/modelscope:/root/.cache/modelscope
+    restart: always
+    network_mode: host # required by RDMA
+    privileged: true # required by RDMA
+    # Or you can only publish port 30000
+    # ports:
+    #   - 30000:30000
+    environment:
+      HF_TOKEN: <secret>
+      # if you use modelscope to download model, you need set this environment
+      # - SGLANG_USE_MODELSCOPE: true
+    entrypoint: python3 -m sglang.launch_server
+    command: --model-path meta-llama/Llama-3.1-8B-Instruct
+      --host 0.0.0.0
+      --port 30000
+    ulimits:
+      memlock: -1
+      stack: 67108864
+    ipc: host
+    healthcheck:
+      test: ["CMD-SHELL", "curl -f http://localhost:30000/health || exit 1"]
+    deploy:
+      resources:
+        reservations:
+          devices:
+            - driver: nvidia
+              device_ids: ["0"]
+              capabilities: [gpu]