diff --git a/python/sglang/eval/loogle_eval.py b/python/sglang/eval/loogle_eval.py index 250f47a7b..22fc70541 100644 --- a/python/sglang/eval/loogle_eval.py +++ b/python/sglang/eval/loogle_eval.py @@ -142,7 +142,7 @@ if __name__ == "__main__": parser.add_argument( "--model", default="meta-llama/Llama-4-Maverick-17B-128E-Instruct", - help="Model name or ID", + help="Model name or ID, only used for model name", ) parser.add_argument( "--max-concurrency", type=int, default=144, help="Maximum concurrent requests" diff --git a/scripts/ci_install_dependency.sh b/scripts/ci_install_dependency.sh index 9bd292a01..ae99fe4e0 100755 --- a/scripts/ci_install_dependency.sh +++ b/scripts/ci_install_dependency.sh @@ -31,3 +31,6 @@ pip install -e lmms-eval/ # Install FlashMLA for attention backend tests pip install git+https://github.com/deepseek-ai/FlashMLA.git + +# Install hf_xet +pip install huggingface_hub[hf_xet] diff --git a/scripts/ci_install_dependency_8_gpu.sh b/scripts/ci_install_dependency_8_gpu.sh index 6bd28c4c5..3167ff403 100755 --- a/scripts/ci_install_dependency_8_gpu.sh +++ b/scripts/ci_install_dependency_8_gpu.sh @@ -123,3 +123,6 @@ gdrcopy_copybw echo "=== Verify NVSHMEM ===" nvshmem-info -a # /opt/nvshmem/bin/perftest/device/pt-to-pt/shmem_put_bw + +# Install hf_xet +pip install huggingface_hub[hf_xet]