Make sglang compat with vllm 0.5.1 (#598)
This commit is contained in:
@@ -53,7 +53,11 @@ pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.3/
|
|||||||
The docker images are available on Docker Hub as [lmsysorg/sglang](https://hub.docker.com/r/lmsysorg/sglang/tags).
|
The docker images are available on Docker Hub as [lmsysorg/sglang](https://hub.docker.com/r/lmsysorg/sglang/tags).
|
||||||
|
|
||||||
### Common Notes
|
### Common Notes
|
||||||
- If you see errors from the Triton compiler, please install the [Triton Nightly](https://triton-lang.org/main/getting-started/installation.html).
|
- If you see errors from the Triton compiler, please install the [Triton Nightly](https://triton-lang.org/main/getting-started/installation.html) by
|
||||||
|
```
|
||||||
|
pip uninstall -y triton triton-nightly
|
||||||
|
pip install -U --index-url https://aiinfra.pkgs.visualstudio.com/PublicPackages/_packaging/Triton-Nightly/pypi/simple/ triton-nightly
|
||||||
|
```
|
||||||
- If you cannot install FlashInfer, check out its [installation](https://docs.flashinfer.ai/installation.html#) page. If you still cannot install it, you can use the slower Triton kernels by adding `--disable-flashinfer` when launching the server.
|
- If you cannot install FlashInfer, check out its [installation](https://docs.flashinfer.ai/installation.html#) page. If you still cannot install it, you can use the slower Triton kernels by adding `--disable-flashinfer` when launching the server.
|
||||||
- If you only need to use the OpenAI backend, you can avoid installing other dependencies by using `pip install "sglang[openai]"`.
|
- If you only need to use the OpenAI backend, you can avoid installing other dependencies by using `pip install "sglang[openai]"`.
|
||||||
|
|
||||||
|
|||||||
@@ -21,7 +21,7 @@ dependencies = [
|
|||||||
|
|
||||||
[project.optional-dependencies]
|
[project.optional-dependencies]
|
||||||
srt = ["aiohttp", "fastapi", "hf_transfer", "huggingface_hub", "interegular", "packaging", "pillow",
|
srt = ["aiohttp", "fastapi", "hf_transfer", "huggingface_hub", "interegular", "packaging", "pillow",
|
||||||
"psutil", "pydantic", "rpyc", "torch", "uvicorn", "uvloop", "zmq", "vllm==0.5.0", "outlines>=0.0.44"]
|
"psutil", "pydantic", "rpyc", "torch", "uvicorn", "uvloop", "zmq", "vllm==0.5.1", "outlines>=0.0.44"]
|
||||||
openai = ["openai>=1.0", "tiktoken"]
|
openai = ["openai>=1.0", "tiktoken"]
|
||||||
anthropic = ["anthropic>=0.20.0"]
|
anthropic = ["anthropic>=0.20.0"]
|
||||||
litellm = ["litellm>=1.0.0"]
|
litellm = ["litellm>=1.0.0"]
|
||||||
|
|||||||
@@ -326,7 +326,7 @@ class ModelRunner:
|
|||||||
device_config=device_config,
|
device_config=device_config,
|
||||||
load_config=load_config,
|
load_config=load_config,
|
||||||
lora_config=None,
|
lora_config=None,
|
||||||
vision_language_config=None,
|
multimodal_config=None,
|
||||||
parallel_config=None,
|
parallel_config=None,
|
||||||
scheduler_config=None,
|
scheduler_config=None,
|
||||||
cache_config=None,
|
cache_config=None,
|
||||||
|
|||||||
@@ -476,7 +476,7 @@ def monkey_patch_vllm_dummy_weight_loader():
|
|||||||
ModelConfig,
|
ModelConfig,
|
||||||
ParallelConfig,
|
ParallelConfig,
|
||||||
SchedulerConfig,
|
SchedulerConfig,
|
||||||
VisionLanguageConfig,
|
MultiModalConfig,
|
||||||
_initialize_model,
|
_initialize_model,
|
||||||
initialize_dummy_weights,
|
initialize_dummy_weights,
|
||||||
nn,
|
nn,
|
||||||
@@ -489,7 +489,7 @@ def monkey_patch_vllm_dummy_weight_loader():
|
|||||||
model_config: ModelConfig,
|
model_config: ModelConfig,
|
||||||
device_config: DeviceConfig,
|
device_config: DeviceConfig,
|
||||||
lora_config: Optional[LoRAConfig],
|
lora_config: Optional[LoRAConfig],
|
||||||
vision_language_config: Optional[VisionLanguageConfig],
|
multimodal_config: Optional[MultiModalConfig],
|
||||||
parallel_config: ParallelConfig,
|
parallel_config: ParallelConfig,
|
||||||
scheduler_config: SchedulerConfig,
|
scheduler_config: SchedulerConfig,
|
||||||
cache_config: CacheConfig,
|
cache_config: CacheConfig,
|
||||||
@@ -500,7 +500,7 @@ def monkey_patch_vllm_dummy_weight_loader():
|
|||||||
model_config,
|
model_config,
|
||||||
self.load_config,
|
self.load_config,
|
||||||
lora_config,
|
lora_config,
|
||||||
vision_language_config,
|
multimodal_config,
|
||||||
cache_config,
|
cache_config,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user