diff --git a/.github/workflows/pr-test-pd-router.yml b/.github/workflows/pr-test-pd-router.yml index 95278aed8..15ddf0460 100644 --- a/.github/workflows/pr-test-pd-router.yml +++ b/.github/workflows/pr-test-pd-router.yml @@ -138,7 +138,7 @@ jobs: run: | echo "Installing SGLang with all extras..." python3 -m pip --no-cache-dir install --upgrade pip - python3 -m pip --no-cache-dir install torch==2.8.0 torchvision torchaudio --index-url https://download.pytorch.org/whl/test/cu126 + python3 -m pip --no-cache-dir install torch==2.8.0 torchvision torchaudio --index-url https://download.pytorch.org/whl/test/cu128 python3 -m pip --no-cache-dir install -e "python[all]" --break-system-packages python3 -m pip --no-cache-dir install mooncake-transfer-engine==0.3.6.post1 python3 -m pip --no-cache-dir install --user --force-reinstall genai-bench==0.0.2 diff --git a/python/sglang/srt/speculative/spec_utils.py b/python/sglang/srt/speculative/spec_utils.py index 1bfa774bc..714da8573 100644 --- a/python/sglang/srt/speculative/spec_utils.py +++ b/python/sglang/srt/speculative/spec_utils.py @@ -15,7 +15,7 @@ from sglang.srt.managers.schedule_batch import Req from sglang.srt.utils import is_cuda, is_hip if is_cuda(): - from sgl_kernel import fast_topk, tree_speculative_sampling_target_only + from sgl_kernel import fast_topk elif is_hip(): from sgl_kernel import fast_topk @@ -30,8 +30,7 @@ SIMULATE_ACC_LEN = envs.SGLANG_SIMULATE_ACC_LEN.get() # turn off if < 0 SIMULATE_ACC_METHOD = envs.SGLANG_SIMULATE_ACC_METHOD.get() TREE_TRAVERSE_TIME_THRESHOLD = 1 # TODO: set this properly - -TREE_SPEC_KERNEL_AVAILABLE = "tree_speculative_sampling_target_only" in globals() +TREE_SPEC_KERNEL_AVAILABLE = is_cuda() # This kernel is only available for CUDA now @triton.jit diff --git a/scripts/ci/ci_install_dependency.sh b/scripts/ci/ci_install_dependency.sh index 61ea15565..b3502e342 100755 --- a/scripts/ci/ci_install_dependency.sh +++ b/scripts/ci/ci_install_dependency.sh @@ -24,7 +24,7 @@ if [ "$IS_BLACKWELL" = "1" ]; then PIP_INSTALL_SUFFIX="--break-system-packages" # Clean up existing installations - $PIP_CMD uninstall -y flashinfer_python sgl-kernel sglang vllm torch $PIP_INSTALL_SUFFIX || true + $PIP_CMD uninstall -y flashinfer_python sgl-kernel sglang vllm torch torchaudio $PIP_INSTALL_SUFFIX || true else # In normal cases, we use uv, which is much faster than pip. pip install --upgrade pip @@ -35,7 +35,7 @@ else PIP_INSTALL_SUFFIX="--index-strategy unsafe-best-match" # Clean up existing installations - $PIP_CMD uninstall flashinfer_python sgl-kernel sglang vllm torch || true + $PIP_CMD uninstall flashinfer_python sgl-kernel sglang vllm torch torchaudio || true fi # Install the main package