upgrade flashinfer v0.2.2.post1 (#3934)
This commit is contained in:
@@ -143,4 +143,4 @@ sky status --endpoint 30000 sglang
|
||||
- [FlashInfer](https://github.com/flashinfer-ai/flashinfer) is the default attention kernel backend. It only supports sm75 and above. If you encounter any FlashInfer-related issues on sm75+ devices (e.g., T4, A10, A100, L4, L40S, H100), please switch to other kernels by adding `--attention-backend triton --sampling-backend pytorch` and open an issue on GitHub.
|
||||
- If you only need to use OpenAI models with the frontend language, you can avoid installing other dependencies by using `pip install "sglang[openai]"`.
|
||||
- The language frontend operates independently of the backend runtime. You can install the frontend locally without needing a GPU, while the backend can be set up on a GPU-enabled machine. To install the frontend, run `pip install sglang`, and for the backend, use `pip install sglang[srt]`. `srt` is the abbreviation of SGLang runtime.
|
||||
- To reinstall flashinfer locally, use the following command: `pip install "flashinfer-python>=0.2.1.post2" -i https://flashinfer.ai/whl/cu124/torch2.5 --force-reinstall --no-deps` and then delete the cache with `rm -rf ~/.cache/flashinfer`.
|
||||
- To reinstall flashinfer locally, use the following command: `pip install "flashinfer-python>=0.2.2.post1" -i https://flashinfer.ai/whl/cu124/torch2.5 --force-reinstall --no-deps` and then delete the cache with `rm -rf ~/.cache/flashinfer`.
|
||||
|
||||
@@ -43,7 +43,7 @@ runtime_common = [
|
||||
srt = [
|
||||
"sglang[runtime_common]",
|
||||
"sgl-kernel>=0.0.3.post6",
|
||||
"flashinfer_python>=0.2.1.post2",
|
||||
"flashinfer_python>=0.2.2.post1",
|
||||
"torch==2.5.1",
|
||||
"vllm>=0.6.4.post1,<=0.7.2",
|
||||
"cuda-python",
|
||||
|
||||
@@ -330,7 +330,7 @@ def _set_envs_and_config(server_args: ServerArgs):
|
||||
if server_args.attention_backend == "flashinfer":
|
||||
assert_pkg_version(
|
||||
"flashinfer_python",
|
||||
"0.2.1.post2",
|
||||
"0.2.2.post1",
|
||||
"Please uninstall the old version and "
|
||||
"reinstall the latest version by following the instructions "
|
||||
"at https://docs.flashinfer.ai/installation.html.",
|
||||
|
||||
@@ -16,7 +16,7 @@ pip install -e "python[all]" --find-links https://flashinfer.ai/whl/cu124/torch2
|
||||
|
||||
rm -rf /root/.cache/flashinfer
|
||||
# Force reinstall flashinfer and torch_memory_saver
|
||||
pip install flashinfer_python==0.2.1.post2 --find-links ${FLASHINFER_REPO} --force-reinstall --no-deps
|
||||
pip install flashinfer_python==0.2.2.post1 --find-links ${FLASHINFER_REPO} --force-reinstall --no-deps
|
||||
|
||||
pip install torch_memory_saver --force-reinstall
|
||||
|
||||
|
||||
Reference in New Issue
Block a user