[Feat] upgrade pytorch2.6 (#5417)
This commit is contained in:
2
.github/workflows/pr-test-sgl-kernel.yml
vendored
2
.github/workflows/pr-test-sgl-kernel.yml
vendored
@@ -88,7 +88,7 @@ jobs:
|
|||||||
- name: Install
|
- name: Install
|
||||||
run: |
|
run: |
|
||||||
bash scripts/ci_install_dependency.sh
|
bash scripts/ci_install_dependency.sh
|
||||||
pip3 install torch==2.5.1 && pip3 install pytest
|
pip3 install torch==2.6.0 && pip3 install pytest
|
||||||
pip3 uninstall sgl-kernel -y || true
|
pip3 uninstall sgl-kernel -y || true
|
||||||
pip3 install sgl-kernel/dist/*whl --force-reinstall --no-deps
|
pip3 install sgl-kernel/dist/*whl --force-reinstall --no-deps
|
||||||
pip3 list | grep sgl-kernel
|
pip3 list | grep sgl-kernel
|
||||||
|
|||||||
@@ -33,7 +33,7 @@ Add [performance optimization options](#performance-optimization-options) as nee
|
|||||||
|
|
||||||
```bash
|
```bash
|
||||||
# Installation
|
# Installation
|
||||||
pip install "sglang[all]>=0.4.3" --find-links https://flashinfer.ai/whl/cu124/torch2.5/flashinfer-python
|
pip install "sglang[all]>=0.4.5.post2"
|
||||||
|
|
||||||
# Launch
|
# Launch
|
||||||
python3 -m sglang.launch_server --model deepseek-ai/DeepSeek-V3 --tp 8 --trust-remote-code
|
python3 -m sglang.launch_server --model deepseek-ai/DeepSeek-V3 --tp 8 --trust-remote-code
|
||||||
|
|||||||
@@ -43,6 +43,6 @@ RUN python3 -m pip install --upgrade pip setuptools wheel html5lib six \
|
|||||||
fi \
|
fi \
|
||||||
&& python3 -m pip install --no-cache-dir torch --index-url https://download.pytorch.org/whl/cu${CUINDEX} \
|
&& python3 -m pip install --no-cache-dir torch --index-url https://download.pytorch.org/whl/cu${CUINDEX} \
|
||||||
&& cd sglang \
|
&& cd sglang \
|
||||||
&& python3 -m pip --no-cache-dir install -e "python[${BUILD_TYPE}]" --find-links https://flashinfer.ai/whl/cu${CUINDEX}/torch2.5/flashinfer-python
|
&& python3 -m pip --no-cache-dir install -e "python[${BUILD_TYPE}]" --find-links https://flashinfer.ai/whl/cu${CUINDEX}/torch2.6/flashinfer-python
|
||||||
|
|
||||||
ENV DEBIAN_FRONTEND=interactive
|
ENV DEBIAN_FRONTEND=interactive
|
||||||
|
|||||||
@@ -164,4 +164,4 @@ sky status --endpoint 30000 sglang
|
|||||||
- [FlashInfer](https://github.com/flashinfer-ai/flashinfer) is the default attention kernel backend. It only supports sm75 and above. If you encounter any FlashInfer-related issues on sm75+ devices (e.g., T4, A10, A100, L4, L40S, H100), please switch to other kernels by adding `--attention-backend triton --sampling-backend pytorch` and open an issue on GitHub.
|
- [FlashInfer](https://github.com/flashinfer-ai/flashinfer) is the default attention kernel backend. It only supports sm75 and above. If you encounter any FlashInfer-related issues on sm75+ devices (e.g., T4, A10, A100, L4, L40S, H100), please switch to other kernels by adding `--attention-backend triton --sampling-backend pytorch` and open an issue on GitHub.
|
||||||
- If you only need to use OpenAI models with the frontend language, you can avoid installing other dependencies by using `pip install "sglang[openai]"`.
|
- If you only need to use OpenAI models with the frontend language, you can avoid installing other dependencies by using `pip install "sglang[openai]"`.
|
||||||
- The language frontend operates independently of the backend runtime. You can install the frontend locally without needing a GPU, while the backend can be set up on a GPU-enabled machine. To install the frontend, run `pip install sglang`, and for the backend, use `pip install sglang[srt]`. `srt` is the abbreviation of SGLang runtime.
|
- The language frontend operates independently of the backend runtime. You can install the frontend locally without needing a GPU, while the backend can be set up on a GPU-enabled machine. To install the frontend, run `pip install sglang`, and for the backend, use `pip install sglang[srt]`. `srt` is the abbreviation of SGLang runtime.
|
||||||
- To reinstall flashinfer locally, use the following command: `pip install "flashinfer-python>=0.2.3" -i https://flashinfer.ai/whl/cu124/torch2.5 --force-reinstall --no-deps` and then delete the cache with `rm -rf ~/.cache/flashinfer`.
|
- To reinstall flashinfer locally, use the following command: `pip install "flashinfer-python==0.2.3" -i https://flashinfer.ai/whl/cu124/torch2.6 --force-reinstall --no-deps` and then delete the cache with `rm -rf ~/.cache/flashinfer`.
|
||||||
|
|||||||
@@ -49,8 +49,8 @@ srt = [
|
|||||||
"sglang[runtime_common]",
|
"sglang[runtime_common]",
|
||||||
"sgl-kernel==0.0.9.post2",
|
"sgl-kernel==0.0.9.post2",
|
||||||
"flashinfer_python==0.2.3",
|
"flashinfer_python==0.2.3",
|
||||||
"torch==2.5.1",
|
"torch==2.6.0",
|
||||||
"torchvision==0.20.1",
|
"torchvision==0.21.0",
|
||||||
"cuda-python",
|
"cuda-python",
|
||||||
"outlines>=0.0.44,<=0.1.11",
|
"outlines>=0.0.44,<=0.1.11",
|
||||||
"partial_json_parser",
|
"partial_json_parser",
|
||||||
|
|||||||
@@ -143,7 +143,7 @@ def memcpy_triton_kernel(
|
|||||||
src_ptr,
|
src_ptr,
|
||||||
offset_ptr,
|
offset_ptr,
|
||||||
sz_ptr,
|
sz_ptr,
|
||||||
offset_src,
|
offset_src: tl.constexpr,
|
||||||
chunk_size, # multiplied for offset and sz
|
chunk_size, # multiplied for offset and sz
|
||||||
BLOCK_SIZE: tl.constexpr,
|
BLOCK_SIZE: tl.constexpr,
|
||||||
):
|
):
|
||||||
|
|||||||
@@ -23,7 +23,7 @@ pip install -e "python[all]"
|
|||||||
|
|
||||||
# Install additional dependencies
|
# Install additional dependencies
|
||||||
pip install torch_memory_saver
|
pip install torch_memory_saver
|
||||||
pip install transformers==4.51.0 sentence_transformers accelerate==1.4.0 peft pandas datasets timm torchaudio
|
pip install transformers==4.51.0 sentence_transformers accelerate peft pandas datasets timm torchaudio
|
||||||
|
|
||||||
# For compling xgrammar kernels
|
# For compling xgrammar kernels
|
||||||
pip install cuda-python nvidia-cuda-nvrtc-cu12
|
pip install cuda-python nvidia-cuda-nvrtc-cu12
|
||||||
|
|||||||
Reference in New Issue
Block a user