sglangv0.5.2 & support Qwen3-Next-80B-A3B-Instruct

2025-09-13 17:00:20 +08:00
commit 118f1fc726
2037 changed files with 515371 additions and 0 deletions
--- a/scripts/playground/launch_tgi.sh
+++ b/scripts/playground/launch_tgi.sh
@@ -0,0 +1,7 @@
+# Assuming the model is downdloaded at /home/ubuntu/model_weights/Llama-2-7b-chat-hf
+docker run --name tgi --rm -ti --gpus all --network host \
+  -v /home/ubuntu/model_weights/Llama-2-7b-chat-hf:/Llama-2-7b-chat-hf \
+  ghcr.io/huggingface/text-generation-inference:1.1.0 \
+  --model-id /Llama-2-7b-chat-hf --num-shard 1  --trust-remote-code \
+  --max-input-length 2048 --max-total-tokens 4096 \
+  --port 24000