Simplify frontend language (#9029)
This commit is contained in:
@@ -7,7 +7,7 @@ import time
|
||||
|
||||
import numpy as np
|
||||
|
||||
from sglang.api import set_default_backend
|
||||
from sglang.lang.api import set_default_backend
|
||||
from sglang.test.test_utils import (
|
||||
add_common_sglang_args_and_parse,
|
||||
dump_bench_raw_result,
|
||||
|
||||
@@ -5,7 +5,7 @@ import time
|
||||
|
||||
import numpy as np
|
||||
|
||||
from sglang.api import set_default_backend
|
||||
from sglang.lang.api import set_default_backend
|
||||
from sglang.test.test_utils import (
|
||||
add_common_sglang_args_and_parse,
|
||||
select_sglang_backend,
|
||||
|
||||
@@ -47,10 +47,10 @@ runtime_common = [
|
||||
"sentencepiece",
|
||||
"soundfile==0.13.1",
|
||||
"scipy",
|
||||
"torchao==0.9.0",
|
||||
"transformers==4.55.0",
|
||||
"timm==1.0.16",
|
||||
"tiktoken",
|
||||
"torchao==0.9.0",
|
||||
"transformers==4.55.0",
|
||||
"uvicorn",
|
||||
"uvloop",
|
||||
"xgrammar==0.1.22",
|
||||
@@ -84,6 +84,9 @@ srt_hip = [
|
||||
"petit_kernel==0.0.2",
|
||||
]
|
||||
|
||||
# CPU: torch wheel for CPU needs to be installed from https://download.pytorch.org/whl/cpu
|
||||
srt_cpu = ["sglang[runtime_common]", "einops"]
|
||||
|
||||
# xpu is not enabled in public vllm and torch whl,
|
||||
# need to follow https://docs.vllm.ai/en/latest/getting_started/xpu-installation.htmlinstall vllm
|
||||
srt_xpu = ["sglang[runtime_common]"]
|
||||
@@ -92,8 +95,6 @@ srt_xpu = ["sglang[runtime_common]"]
|
||||
# https://docs.vllm.ai/en/latest/getting_started/gaudi-installation.html
|
||||
srt_hpu = ["sglang[runtime_common]"]
|
||||
|
||||
# CPU: torch wheel for CPU needs to be installed from https://download.pytorch.org/whl/cpu
|
||||
srt_cpu = ["sglang[runtime_common]", "einops"]
|
||||
# https://vllm-ascend.readthedocs.io/en/latest/installation.html
|
||||
srt_npu = ["sglang[runtime_common]"]
|
||||
|
||||
@@ -112,12 +113,12 @@ test = [
|
||||
"sentence_transformers",
|
||||
"pytest",
|
||||
]
|
||||
all = ["sglang[srt]", "sglang[openai]", "sglang[anthropic]", "sglang[litellm]", "sglang[torch_memory_saver]", "sglang[decord]"]
|
||||
all_hip = ["sglang[srt_hip]", "sglang[openai]", "sglang[anthropic]", "sglang[litellm]", "sglang[decord]"]
|
||||
all_xpu = ["sglang[srt_xpu]", "sglang[openai]", "sglang[anthropic]", "sglang[litellm]", "sglang[decord]"]
|
||||
all_hpu = ["sglang[srt_hpu]", "sglang[openai]", "sglang[anthropic]", "sglang[litellm]", "sglang[decord]"]
|
||||
all_cpu = ["sglang[srt_cpu]", "sglang[openai]", "sglang[anthropic]", "sglang[litellm]", "sglang[decord]"]
|
||||
all_npu = ["sglang[srt_npu]", "sglang[openai]", "sglang[anthropic]", "sglang[litellm]", "sglang[decord]"]
|
||||
all = ["sglang[srt]", "sglang[openai]", "sglang[anthropic]", "sglang[torch_memory_saver]", "sglang[decord]"]
|
||||
all_hip = ["sglang[srt_hip]", "sglang[openai]", "sglang[anthropic]", "sglang[decord]"]
|
||||
all_xpu = ["sglang[srt_xpu]", "sglang[openai]", "sglang[anthropic]", "sglang[decord]"]
|
||||
all_hpu = ["sglang[srt_hpu]", "sglang[openai]", "sglang[anthropic]", "sglang[decord]"]
|
||||
all_cpu = ["sglang[srt_cpu]", "sglang[openai]", "sglang[anthropic]", "sglang[decord]"]
|
||||
all_npu = ["sglang[srt_npu]", "sglang[openai]", "sglang[anthropic]", "sglang[decord]"]
|
||||
|
||||
dev = ["sglang[all]", "sglang[test]"]
|
||||
dev_hip = ["sglang[all_hip]", "sglang[test]"]
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
- `srt`: The backend engine for running local models. (SRT = SGLang Runtime).
|
||||
- `test`: The test utilities.
|
||||
- `api.py`: The public APIs.
|
||||
- `bench_offline_throughput.py`: Benchmark the throughput in the offline mode.
|
||||
- `bench_offline_throughput.py`: Benchmark the performance in the offline mode.
|
||||
- `bench_one_batch.py`: Benchmark the latency of running a single static batch without a server.
|
||||
- `bench_one_batch_server.py`: Benchmark the latency of running a single batch with a server.
|
||||
- `bench_serving.py`: Benchmark online serving with dynamic requests.
|
||||
|
||||
@@ -1,7 +1,8 @@
|
||||
# SGLang public APIs
|
||||
|
||||
# Frontend Language APIs
|
||||
from sglang.api import (
|
||||
from sglang.global_config import global_config
|
||||
from sglang.lang.api import (
|
||||
Engine,
|
||||
Runtime,
|
||||
assistant,
|
||||
@@ -25,13 +26,13 @@ from sglang.api import (
|
||||
user_end,
|
||||
video,
|
||||
)
|
||||
from sglang.global_config import global_config
|
||||
from sglang.lang.backend.runtime_endpoint import RuntimeEndpoint
|
||||
from sglang.lang.choices import (
|
||||
greedy_token_selection,
|
||||
token_length_normalized,
|
||||
unconditional_likelihood_normalized,
|
||||
)
|
||||
from sglang.srt.entrypoints.engine import Engine
|
||||
from sglang.utils import LazyImport
|
||||
from sglang.version import __version__
|
||||
|
||||
|
||||
@@ -12,7 +12,7 @@ import time
|
||||
|
||||
import numpy as np
|
||||
|
||||
from sglang.api import set_default_backend
|
||||
from sglang.lang.api import set_default_backend
|
||||
from sglang.lang.backend.runtime_endpoint import RuntimeEndpoint
|
||||
from sglang.utils import download_and_cache_file, dump_state_text, read_jsonl
|
||||
|
||||
|
||||
@@ -8,7 +8,7 @@ import time
|
||||
import numpy as np
|
||||
|
||||
import sglang as sgl
|
||||
from sglang.api import set_default_backend
|
||||
from sglang.lang.api import set_default_backend
|
||||
from sglang.lang.backend.runtime_endpoint import RuntimeEndpoint
|
||||
from sglang.utils import download_and_cache_file, dump_state_text, read_jsonl
|
||||
|
||||
|
||||
Reference in New Issue
Block a user