Simplify frontend language (#9029)
This commit is contained in:
@@ -7,7 +7,7 @@ import time
|
|||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
from sglang.api import set_default_backend
|
from sglang.lang.api import set_default_backend
|
||||||
from sglang.test.test_utils import (
|
from sglang.test.test_utils import (
|
||||||
add_common_sglang_args_and_parse,
|
add_common_sglang_args_and_parse,
|
||||||
dump_bench_raw_result,
|
dump_bench_raw_result,
|
||||||
|
|||||||
@@ -5,7 +5,7 @@ import time
|
|||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
from sglang.api import set_default_backend
|
from sglang.lang.api import set_default_backend
|
||||||
from sglang.test.test_utils import (
|
from sglang.test.test_utils import (
|
||||||
add_common_sglang_args_and_parse,
|
add_common_sglang_args_and_parse,
|
||||||
select_sglang_backend,
|
select_sglang_backend,
|
||||||
|
|||||||
@@ -47,10 +47,10 @@ runtime_common = [
|
|||||||
"sentencepiece",
|
"sentencepiece",
|
||||||
"soundfile==0.13.1",
|
"soundfile==0.13.1",
|
||||||
"scipy",
|
"scipy",
|
||||||
"torchao==0.9.0",
|
|
||||||
"transformers==4.55.0",
|
|
||||||
"timm==1.0.16",
|
"timm==1.0.16",
|
||||||
"tiktoken",
|
"tiktoken",
|
||||||
|
"torchao==0.9.0",
|
||||||
|
"transformers==4.55.0",
|
||||||
"uvicorn",
|
"uvicorn",
|
||||||
"uvloop",
|
"uvloop",
|
||||||
"xgrammar==0.1.22",
|
"xgrammar==0.1.22",
|
||||||
@@ -84,6 +84,9 @@ srt_hip = [
|
|||||||
"petit_kernel==0.0.2",
|
"petit_kernel==0.0.2",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
# CPU: torch wheel for CPU needs to be installed from https://download.pytorch.org/whl/cpu
|
||||||
|
srt_cpu = ["sglang[runtime_common]", "einops"]
|
||||||
|
|
||||||
# xpu is not enabled in public vllm and torch whl,
|
# xpu is not enabled in public vllm and torch whl,
|
||||||
# need to follow https://docs.vllm.ai/en/latest/getting_started/xpu-installation.htmlinstall vllm
|
# need to follow https://docs.vllm.ai/en/latest/getting_started/xpu-installation.htmlinstall vllm
|
||||||
srt_xpu = ["sglang[runtime_common]"]
|
srt_xpu = ["sglang[runtime_common]"]
|
||||||
@@ -92,8 +95,6 @@ srt_xpu = ["sglang[runtime_common]"]
|
|||||||
# https://docs.vllm.ai/en/latest/getting_started/gaudi-installation.html
|
# https://docs.vllm.ai/en/latest/getting_started/gaudi-installation.html
|
||||||
srt_hpu = ["sglang[runtime_common]"]
|
srt_hpu = ["sglang[runtime_common]"]
|
||||||
|
|
||||||
# CPU: torch wheel for CPU needs to be installed from https://download.pytorch.org/whl/cpu
|
|
||||||
srt_cpu = ["sglang[runtime_common]", "einops"]
|
|
||||||
# https://vllm-ascend.readthedocs.io/en/latest/installation.html
|
# https://vllm-ascend.readthedocs.io/en/latest/installation.html
|
||||||
srt_npu = ["sglang[runtime_common]"]
|
srt_npu = ["sglang[runtime_common]"]
|
||||||
|
|
||||||
@@ -112,12 +113,12 @@ test = [
|
|||||||
"sentence_transformers",
|
"sentence_transformers",
|
||||||
"pytest",
|
"pytest",
|
||||||
]
|
]
|
||||||
all = ["sglang[srt]", "sglang[openai]", "sglang[anthropic]", "sglang[litellm]", "sglang[torch_memory_saver]", "sglang[decord]"]
|
all = ["sglang[srt]", "sglang[openai]", "sglang[anthropic]", "sglang[torch_memory_saver]", "sglang[decord]"]
|
||||||
all_hip = ["sglang[srt_hip]", "sglang[openai]", "sglang[anthropic]", "sglang[litellm]", "sglang[decord]"]
|
all_hip = ["sglang[srt_hip]", "sglang[openai]", "sglang[anthropic]", "sglang[decord]"]
|
||||||
all_xpu = ["sglang[srt_xpu]", "sglang[openai]", "sglang[anthropic]", "sglang[litellm]", "sglang[decord]"]
|
all_xpu = ["sglang[srt_xpu]", "sglang[openai]", "sglang[anthropic]", "sglang[decord]"]
|
||||||
all_hpu = ["sglang[srt_hpu]", "sglang[openai]", "sglang[anthropic]", "sglang[litellm]", "sglang[decord]"]
|
all_hpu = ["sglang[srt_hpu]", "sglang[openai]", "sglang[anthropic]", "sglang[decord]"]
|
||||||
all_cpu = ["sglang[srt_cpu]", "sglang[openai]", "sglang[anthropic]", "sglang[litellm]", "sglang[decord]"]
|
all_cpu = ["sglang[srt_cpu]", "sglang[openai]", "sglang[anthropic]", "sglang[decord]"]
|
||||||
all_npu = ["sglang[srt_npu]", "sglang[openai]", "sglang[anthropic]", "sglang[litellm]", "sglang[decord]"]
|
all_npu = ["sglang[srt_npu]", "sglang[openai]", "sglang[anthropic]", "sglang[decord]"]
|
||||||
|
|
||||||
dev = ["sglang[all]", "sglang[test]"]
|
dev = ["sglang[all]", "sglang[test]"]
|
||||||
dev_hip = ["sglang[all_hip]", "sglang[test]"]
|
dev_hip = ["sglang[all_hip]", "sglang[test]"]
|
||||||
|
|||||||
@@ -5,7 +5,7 @@
|
|||||||
- `srt`: The backend engine for running local models. (SRT = SGLang Runtime).
|
- `srt`: The backend engine for running local models. (SRT = SGLang Runtime).
|
||||||
- `test`: The test utilities.
|
- `test`: The test utilities.
|
||||||
- `api.py`: The public APIs.
|
- `api.py`: The public APIs.
|
||||||
- `bench_offline_throughput.py`: Benchmark the throughput in the offline mode.
|
- `bench_offline_throughput.py`: Benchmark the performance in the offline mode.
|
||||||
- `bench_one_batch.py`: Benchmark the latency of running a single static batch without a server.
|
- `bench_one_batch.py`: Benchmark the latency of running a single static batch without a server.
|
||||||
- `bench_one_batch_server.py`: Benchmark the latency of running a single batch with a server.
|
- `bench_one_batch_server.py`: Benchmark the latency of running a single batch with a server.
|
||||||
- `bench_serving.py`: Benchmark online serving with dynamic requests.
|
- `bench_serving.py`: Benchmark online serving with dynamic requests.
|
||||||
|
|||||||
@@ -1,7 +1,8 @@
|
|||||||
# SGLang public APIs
|
# SGLang public APIs
|
||||||
|
|
||||||
# Frontend Language APIs
|
# Frontend Language APIs
|
||||||
from sglang.api import (
|
from sglang.global_config import global_config
|
||||||
|
from sglang.lang.api import (
|
||||||
Engine,
|
Engine,
|
||||||
Runtime,
|
Runtime,
|
||||||
assistant,
|
assistant,
|
||||||
@@ -25,13 +26,13 @@ from sglang.api import (
|
|||||||
user_end,
|
user_end,
|
||||||
video,
|
video,
|
||||||
)
|
)
|
||||||
from sglang.global_config import global_config
|
|
||||||
from sglang.lang.backend.runtime_endpoint import RuntimeEndpoint
|
from sglang.lang.backend.runtime_endpoint import RuntimeEndpoint
|
||||||
from sglang.lang.choices import (
|
from sglang.lang.choices import (
|
||||||
greedy_token_selection,
|
greedy_token_selection,
|
||||||
token_length_normalized,
|
token_length_normalized,
|
||||||
unconditional_likelihood_normalized,
|
unconditional_likelihood_normalized,
|
||||||
)
|
)
|
||||||
|
from sglang.srt.entrypoints.engine import Engine
|
||||||
from sglang.utils import LazyImport
|
from sglang.utils import LazyImport
|
||||||
from sglang.version import __version__
|
from sglang.version import __version__
|
||||||
|
|
||||||
|
|||||||
@@ -12,7 +12,7 @@ import time
|
|||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
from sglang.api import set_default_backend
|
from sglang.lang.api import set_default_backend
|
||||||
from sglang.lang.backend.runtime_endpoint import RuntimeEndpoint
|
from sglang.lang.backend.runtime_endpoint import RuntimeEndpoint
|
||||||
from sglang.utils import download_and_cache_file, dump_state_text, read_jsonl
|
from sglang.utils import download_and_cache_file, dump_state_text, read_jsonl
|
||||||
|
|
||||||
|
|||||||
@@ -8,7 +8,7 @@ import time
|
|||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
import sglang as sgl
|
import sglang as sgl
|
||||||
from sglang.api import set_default_backend
|
from sglang.lang.api import set_default_backend
|
||||||
from sglang.lang.backend.runtime_endpoint import RuntimeEndpoint
|
from sglang.lang.backend.runtime_endpoint import RuntimeEndpoint
|
||||||
from sglang.utils import download_and_cache_file, dump_state_text, read_jsonl
|
from sglang.utils import download_and_cache_file, dump_state_text, read_jsonl
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user