Organize public APIs (#809)

This commit is contained in:
Liangsheng Yin
2024-07-29 15:34:16 -07:00
committed by GitHub
parent 084fa54d37
commit c8e9fed87a
10 changed files with 74 additions and 66 deletions

View File

@@ -22,46 +22,53 @@ from sglang.api import (
video,
)
# SGLang DSL APIs
__all__ = [
"Runtime",
"assistant",
"assistant_begin",
"assistant_end",
"flush_cache",
"function",
"gen",
"gen_int",
"gen_string",
"get_server_args",
"image",
"select",
"set_default_backend",
"system",
"system_begin",
"system_end",
"user",
"user_begin",
"user_end",
"video",
]
# Global Configurations
from sglang.global_config import global_config
__all__ += ["global_config"]
# SGL Backends
from sglang.lang.backend.runtime_endpoint import RuntimeEndpoint
from sglang.utils import LazyImport
from sglang.version import __version__
Anthropic = LazyImport("sglang.lang.backend.anthropic", "Anthropic")
LiteLLM = LazyImport("sglang.lang.backend.litellm", "LiteLLM")
OpenAI = LazyImport("sglang.lang.backend.openai", "OpenAI")
VertexAI = LazyImport("sglang.lang.backend.vertexai", "VertexAI")
__all__ += ["RuntimeEndpoint", "Anthropic", "LiteLLM", "OpenAI", "VertexAI"]
# public APIs management
__all__ = [
"global_config",
"Anthropic",
"LiteLLM",
"OpenAI",
"RuntimeEndpoint",
"VertexAI",
"function",
"Runtime",
"set_default_backend",
"flush_cache",
"get_server_args",
"gen",
"gen_int",
"gen_string",
"image",
"video",
"select",
"system",
"user",
"assistant",
"user_begin",
"user_end",
"assistant_begin",
"assistant_end",
"system_begin",
"system_end",
]
# Version
from sglang.version import __version__
__all__ += ["__version__"]
# Core Benchmarks
from sglang.benchmarks import bench_latency, bench_serving
__all__ += ["bench_latency", "bench_serving"]

View File

@@ -0,0 +1 @@
"""SGLang core benchmarks."""

View File

@@ -2,10 +2,10 @@
Benchmark the latency of a given model. It accepts arguments similar to those of launch_server.py.
# Usage (latency test):
python -m sglang.bench_latency --model-path meta-llama/Meta-Llama-3-8B-Instruct --load-format dummy
python -m sglang.benchmarks.bench_latency --model-path meta-llama/Meta-Llama-3-8B-Instruct --load-format dummy
# Usage (correctness test):
python -m sglang.bench_latency --model-path TinyLlama/TinyLlama-1.1B-Chat-v0.4 --correct
python -m sglang.benchmarks.bench_latency --model-path TinyLlama/TinyLlama-1.1B-Chat-v0.4 --correct
### Reference output:
prefill logits (first half) tensor([[-10.0312, -9.5000, 0.8936, ..., -4.9414, -3.2402, -3.3633],

View File

@@ -4,10 +4,10 @@
Benchmark online serving.
Usage:
python3 -m sglang.bench_serving --backend sglang --num-prompt 10
python3 -m sglang.benchmarks.bench_serving --backend sglang --num-prompt 10
python3 -m sglang.bench_serving --backend sglang --dataset-name random --num-prompts 3000 --random-input 1024 --random-output 1024 --random-range-ratio 0.5
python3 -m sglang.bench_serving --backend sglang --dataset-name random --request-rate-range 1,2,4,8,16,32 --random-input 4096 --random-output 1024 --random-range-ratio 0.125 --multi
python3 -m sglang.benchmarks.bench_serving --backend sglang --dataset-name random --num-prompts 3000 --random-input 1024 --random-output 1024 --random-range-ratio 0.5
python3 -m sglang.benchmarks.bench_serving --backend sglang --dataset-name random --request-rate-range 1,2,4,8,16,32 --random-input 4096 --random-output 1024 --random-range-ratio 0.125 --multi
"""
import argparse