Update Readme (#660)
Co-authored-by: Lianmin Zheng <lianminzheng@gmail.com>
This commit is contained in:
@@ -1,8 +1,7 @@
|
||||
# Code Structure
|
||||
|
||||
- `backend`: Various backends for the language interpreter.
|
||||
- `lang`: The frontend language.
|
||||
- `srt`: The serving engine for running local models. (SRT = SGLang Runtime).
|
||||
- `srt`: The backend engine for running local models. (SRT = SGLang Runtime).
|
||||
- `test`: Test utilities.
|
||||
- `api.py`: Public API.
|
||||
- `bench_latency.py`: Benchmark utilities.
|
||||
|
||||
@@ -22,16 +22,16 @@ from sglang.api import (
|
||||
video,
|
||||
)
|
||||
|
||||
# SGL Backends
|
||||
from sglang.backend.anthropic import Anthropic
|
||||
from sglang.backend.litellm import LiteLLM
|
||||
from sglang.backend.openai import OpenAI
|
||||
from sglang.backend.runtime_endpoint import RuntimeEndpoint
|
||||
from sglang.backend.vertexai import VertexAI
|
||||
|
||||
# Global Configurations
|
||||
from sglang.global_config import global_config
|
||||
|
||||
# SGL Backends
|
||||
from sglang.lang.backend.anthropic import Anthropic
|
||||
from sglang.lang.backend.litellm import LiteLLM
|
||||
from sglang.lang.backend.openai import OpenAI
|
||||
from sglang.lang.backend.runtime_endpoint import RuntimeEndpoint
|
||||
from sglang.lang.backend.vertexai import VertexAI
|
||||
|
||||
# public APIs management
|
||||
__all__ = [
|
||||
"global_config",
|
||||
|
||||
@@ -4,8 +4,8 @@ import os
|
||||
import re
|
||||
from typing import Callable, List, Optional, Union
|
||||
|
||||
from sglang.backend.base_backend import BaseBackend
|
||||
from sglang.global_config import global_config
|
||||
from sglang.lang.backend.base_backend import BaseBackend
|
||||
from sglang.lang.ir import (
|
||||
SglExpr,
|
||||
SglExprList,
|
||||
|
||||
@@ -2,7 +2,7 @@ from typing import List, Optional, Union
|
||||
|
||||
import numpy as np
|
||||
|
||||
from sglang.backend.base_backend import BaseBackend
|
||||
from sglang.lang.backend.base_backend import BaseBackend
|
||||
from sglang.lang.chat_template import get_chat_template
|
||||
from sglang.lang.interpreter import StreamExecutor
|
||||
from sglang.lang.ir import SglSamplingParams
|
||||
@@ -1,6 +1,6 @@
|
||||
from typing import Mapping, Optional
|
||||
|
||||
from sglang.backend.base_backend import BaseBackend
|
||||
from sglang.lang.backend.base_backend import BaseBackend
|
||||
from sglang.lang.chat_template import get_chat_template_by_model_path
|
||||
from sglang.lang.interpreter import StreamExecutor
|
||||
from sglang.lang.ir import SglSamplingParams
|
||||
@@ -6,7 +6,7 @@ from typing import Callable, List, Optional, Union
|
||||
|
||||
import numpy as np
|
||||
|
||||
from sglang.backend.base_backend import BaseBackend
|
||||
from sglang.lang.backend.base_backend import BaseBackend
|
||||
from sglang.lang.chat_template import ChatTemplate, get_chat_template_by_model_path
|
||||
from sglang.lang.interpreter import StreamExecutor
|
||||
from sglang.lang.ir import SglSamplingParams
|
||||
@@ -3,8 +3,8 @@ from typing import List, Optional
|
||||
|
||||
import numpy as np
|
||||
|
||||
from sglang.backend.base_backend import BaseBackend
|
||||
from sglang.global_config import global_config
|
||||
from sglang.lang.backend.base_backend import BaseBackend
|
||||
from sglang.lang.chat_template import get_chat_template_by_model_path
|
||||
from sglang.lang.interpreter import StreamExecutor
|
||||
from sglang.lang.ir import SglSamplingParams
|
||||
@@ -2,7 +2,7 @@ import os
|
||||
import warnings
|
||||
from typing import Optional
|
||||
|
||||
from sglang.backend.base_backend import BaseBackend
|
||||
from sglang.lang.backend.base_backend import BaseBackend
|
||||
from sglang.lang.chat_template import get_chat_template
|
||||
from sglang.lang.interpreter import StreamExecutor
|
||||
from sglang.lang.ir import SglSamplingParams
|
||||
@@ -3,8 +3,8 @@
|
||||
import uuid
|
||||
from typing import Any, Callable, Dict, List, Optional, Union
|
||||
|
||||
from sglang.backend.base_backend import BaseBackend
|
||||
from sglang.global_config import global_config
|
||||
from sglang.lang.backend.base_backend import BaseBackend
|
||||
from sglang.lang.interpreter import ProgramState, ProgramStateGroup
|
||||
from sglang.lang.ir import (
|
||||
SglArgument,
|
||||
|
||||
@@ -26,7 +26,7 @@ import uvloop
|
||||
from fastapi import FastAPI, Request
|
||||
from fastapi.responses import JSONResponse, Response, StreamingResponse
|
||||
|
||||
from sglang.backend.runtime_endpoint import RuntimeEndpoint
|
||||
from sglang.lang.backend.runtime_endpoint import RuntimeEndpoint
|
||||
from sglang.srt.constrained import disable_cache
|
||||
from sglang.srt.hf_transformers_utils import get_tokenizer
|
||||
from sglang.srt.managers.controller.manager_multi import (
|
||||
|
||||
@@ -166,6 +166,15 @@ class ServerArgs:
|
||||
"--quantization",
|
||||
type=str,
|
||||
default=ServerArgs.quantization,
|
||||
choices=[
|
||||
"awq",
|
||||
"fp8",
|
||||
"gptq",
|
||||
"marlin",
|
||||
"gptq_marlin",
|
||||
"squeezellm",
|
||||
"bitsandbytes",
|
||||
],
|
||||
help="The quantization method.",
|
||||
)
|
||||
parser.add_argument(
|
||||
@@ -243,13 +252,13 @@ class ServerArgs:
|
||||
parser.add_argument(
|
||||
"--show-time-cost",
|
||||
action="store_true",
|
||||
help="Show time cost of custom marks",
|
||||
help="Show time cost of custom marks.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--api-key",
|
||||
type=str,
|
||||
default=ServerArgs.api_key,
|
||||
help="Set API key of the server",
|
||||
help="Set API key of the server.",
|
||||
)
|
||||
|
||||
# Data parallelism
|
||||
@@ -285,17 +294,17 @@ class ServerArgs:
|
||||
parser.add_argument(
|
||||
"--disable-flashinfer",
|
||||
action="store_true",
|
||||
help="Disable flashinfer inference kernels",
|
||||
help="Disable flashinfer inference kernels.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--disable-radix-cache",
|
||||
action="store_true",
|
||||
help="Disable RadixAttention",
|
||||
help="Disable RadixAttention for prefix caching.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--disable-regex-jump-forward",
|
||||
action="store_true",
|
||||
help="Disable regex jump-forward",
|
||||
help="Disable regex jump-forward.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--disable-cuda-graph",
|
||||
|
||||
@@ -306,7 +306,7 @@ def test_image_qa():
|
||||
assert (
|
||||
"taxi" in state.messages()[-1]["content"]
|
||||
or "car" in state.messages()[-1]["content"]
|
||||
)
|
||||
), f"{state.messages()[-1]['content']}"
|
||||
|
||||
|
||||
def test_stream():
|
||||
|
||||
@@ -6,9 +6,9 @@ from functools import partial
|
||||
import numpy as np
|
||||
import requests
|
||||
|
||||
from sglang.backend.openai import OpenAI
|
||||
from sglang.backend.runtime_endpoint import RuntimeEndpoint
|
||||
from sglang.global_config import global_config
|
||||
from sglang.lang.backend.openai import OpenAI
|
||||
from sglang.lang.backend.runtime_endpoint import RuntimeEndpoint
|
||||
from sglang.utils import get_exception_traceback
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user