Organize public APIs (#809)
This commit is contained in:
@@ -22,46 +22,53 @@ from sglang.api import (
|
||||
video,
|
||||
)
|
||||
|
||||
# SGLang DSL APIs
|
||||
__all__ = [
|
||||
"Runtime",
|
||||
"assistant",
|
||||
"assistant_begin",
|
||||
"assistant_end",
|
||||
"flush_cache",
|
||||
"function",
|
||||
"gen",
|
||||
"gen_int",
|
||||
"gen_string",
|
||||
"get_server_args",
|
||||
"image",
|
||||
"select",
|
||||
"set_default_backend",
|
||||
"system",
|
||||
"system_begin",
|
||||
"system_end",
|
||||
"user",
|
||||
"user_begin",
|
||||
"user_end",
|
||||
"video",
|
||||
]
|
||||
|
||||
|
||||
# Global Configurations
|
||||
from sglang.global_config import global_config
|
||||
|
||||
__all__ += ["global_config"]
|
||||
|
||||
# SGL Backends
|
||||
from sglang.lang.backend.runtime_endpoint import RuntimeEndpoint
|
||||
from sglang.utils import LazyImport
|
||||
from sglang.version import __version__
|
||||
|
||||
Anthropic = LazyImport("sglang.lang.backend.anthropic", "Anthropic")
|
||||
LiteLLM = LazyImport("sglang.lang.backend.litellm", "LiteLLM")
|
||||
OpenAI = LazyImport("sglang.lang.backend.openai", "OpenAI")
|
||||
VertexAI = LazyImport("sglang.lang.backend.vertexai", "VertexAI")
|
||||
|
||||
__all__ += ["RuntimeEndpoint", "Anthropic", "LiteLLM", "OpenAI", "VertexAI"]
|
||||
|
||||
# public APIs management
|
||||
__all__ = [
|
||||
"global_config",
|
||||
"Anthropic",
|
||||
"LiteLLM",
|
||||
"OpenAI",
|
||||
"RuntimeEndpoint",
|
||||
"VertexAI",
|
||||
"function",
|
||||
"Runtime",
|
||||
"set_default_backend",
|
||||
"flush_cache",
|
||||
"get_server_args",
|
||||
"gen",
|
||||
"gen_int",
|
||||
"gen_string",
|
||||
"image",
|
||||
"video",
|
||||
"select",
|
||||
"system",
|
||||
"user",
|
||||
"assistant",
|
||||
"user_begin",
|
||||
"user_end",
|
||||
"assistant_begin",
|
||||
"assistant_end",
|
||||
"system_begin",
|
||||
"system_end",
|
||||
]
|
||||
# Version
|
||||
from sglang.version import __version__
|
||||
|
||||
__all__ += ["__version__"]
|
||||
|
||||
# Core Benchmarks
|
||||
from sglang.benchmarks import bench_latency, bench_serving
|
||||
|
||||
__all__ += ["bench_latency", "bench_serving"]
|
||||
|
||||
1
python/sglang/benchmarks/__init__.py
Normal file
1
python/sglang/benchmarks/__init__.py
Normal file
@@ -0,0 +1 @@
|
||||
"""SGLang core benchmarks."""
|
||||
@@ -2,10 +2,10 @@
|
||||
Benchmark the latency of a given model. It accepts arguments similar to those of launch_server.py.
|
||||
|
||||
# Usage (latency test):
|
||||
python -m sglang.bench_latency --model-path meta-llama/Meta-Llama-3-8B-Instruct --load-format dummy
|
||||
python -m sglang.benchmarks.bench_latency --model-path meta-llama/Meta-Llama-3-8B-Instruct --load-format dummy
|
||||
|
||||
# Usage (correctness test):
|
||||
python -m sglang.bench_latency --model-path TinyLlama/TinyLlama-1.1B-Chat-v0.4 --correct
|
||||
python -m sglang.benchmarks.bench_latency --model-path TinyLlama/TinyLlama-1.1B-Chat-v0.4 --correct
|
||||
|
||||
### Reference output:
|
||||
prefill logits (first half) tensor([[-10.0312, -9.5000, 0.8936, ..., -4.9414, -3.2402, -3.3633],
|
||||
@@ -4,10 +4,10 @@
|
||||
Benchmark online serving.
|
||||
|
||||
Usage:
|
||||
python3 -m sglang.bench_serving --backend sglang --num-prompt 10
|
||||
python3 -m sglang.benchmarks.bench_serving --backend sglang --num-prompt 10
|
||||
|
||||
python3 -m sglang.bench_serving --backend sglang --dataset-name random --num-prompts 3000 --random-input 1024 --random-output 1024 --random-range-ratio 0.5
|
||||
python3 -m sglang.bench_serving --backend sglang --dataset-name random --request-rate-range 1,2,4,8,16,32 --random-input 4096 --random-output 1024 --random-range-ratio 0.125 --multi
|
||||
python3 -m sglang.benchmarks.bench_serving --backend sglang --dataset-name random --num-prompts 3000 --random-input 1024 --random-output 1024 --random-range-ratio 0.5
|
||||
python3 -m sglang.benchmarks.bench_serving --backend sglang --dataset-name random --request-rate-range 1,2,4,8,16,32 --random-input 4096 --random-output 1024 --random-range-ratio 0.125 --multi
|
||||
"""
|
||||
|
||||
import argparse
|
||||
Reference in New Issue
Block a user