Update Readme (#660)

Co-authored-by: Lianmin Zheng <lianminzheng@gmail.com>
This commit is contained in:
Ying Sheng
2024-07-19 09:54:01 -07:00
committed by GitHub
parent dc4e4a6acc
commit 51fda1439f
25 changed files with 200 additions and 185 deletions

View File

@@ -1,8 +1,7 @@
# Code Structure
- `backend`: Various backends for the language interpreter.
- `lang`: The frontend language.
- `srt`: The serving engine for running local models. (SRT = SGLang Runtime).
- `srt`: The backend engine for running local models. (SRT = SGLang Runtime).
- `test`: Test utilities.
- `api.py`: Public API.
- `bench_latency.py`: Benchmark utilities.

View File

@@ -22,16 +22,16 @@ from sglang.api import (
video,
)
# SGL Backends
from sglang.backend.anthropic import Anthropic
from sglang.backend.litellm import LiteLLM
from sglang.backend.openai import OpenAI
from sglang.backend.runtime_endpoint import RuntimeEndpoint
from sglang.backend.vertexai import VertexAI
# Global Configurations
from sglang.global_config import global_config
# SGL Backends
from sglang.lang.backend.anthropic import Anthropic
from sglang.lang.backend.litellm import LiteLLM
from sglang.lang.backend.openai import OpenAI
from sglang.lang.backend.runtime_endpoint import RuntimeEndpoint
from sglang.lang.backend.vertexai import VertexAI
# public APIs management
__all__ = [
"global_config",

View File

@@ -4,8 +4,8 @@ import os
import re
from typing import Callable, List, Optional, Union
from sglang.backend.base_backend import BaseBackend
from sglang.global_config import global_config
from sglang.lang.backend.base_backend import BaseBackend
from sglang.lang.ir import (
SglExpr,
SglExprList,

View File

@@ -2,7 +2,7 @@ from typing import List, Optional, Union
import numpy as np
from sglang.backend.base_backend import BaseBackend
from sglang.lang.backend.base_backend import BaseBackend
from sglang.lang.chat_template import get_chat_template
from sglang.lang.interpreter import StreamExecutor
from sglang.lang.ir import SglSamplingParams

View File

@@ -1,6 +1,6 @@
from typing import Mapping, Optional
from sglang.backend.base_backend import BaseBackend
from sglang.lang.backend.base_backend import BaseBackend
from sglang.lang.chat_template import get_chat_template_by_model_path
from sglang.lang.interpreter import StreamExecutor
from sglang.lang.ir import SglSamplingParams

View File

@@ -6,7 +6,7 @@ from typing import Callable, List, Optional, Union
import numpy as np
from sglang.backend.base_backend import BaseBackend
from sglang.lang.backend.base_backend import BaseBackend
from sglang.lang.chat_template import ChatTemplate, get_chat_template_by_model_path
from sglang.lang.interpreter import StreamExecutor
from sglang.lang.ir import SglSamplingParams

View File

@@ -3,8 +3,8 @@ from typing import List, Optional
import numpy as np
from sglang.backend.base_backend import BaseBackend
from sglang.global_config import global_config
from sglang.lang.backend.base_backend import BaseBackend
from sglang.lang.chat_template import get_chat_template_by_model_path
from sglang.lang.interpreter import StreamExecutor
from sglang.lang.ir import SglSamplingParams

View File

@@ -2,7 +2,7 @@ import os
import warnings
from typing import Optional
from sglang.backend.base_backend import BaseBackend
from sglang.lang.backend.base_backend import BaseBackend
from sglang.lang.chat_template import get_chat_template
from sglang.lang.interpreter import StreamExecutor
from sglang.lang.ir import SglSamplingParams

View File

@@ -3,8 +3,8 @@
import uuid
from typing import Any, Callable, Dict, List, Optional, Union
from sglang.backend.base_backend import BaseBackend
from sglang.global_config import global_config
from sglang.lang.backend.base_backend import BaseBackend
from sglang.lang.interpreter import ProgramState, ProgramStateGroup
from sglang.lang.ir import (
SglArgument,

View File

@@ -26,7 +26,7 @@ import uvloop
from fastapi import FastAPI, Request
from fastapi.responses import JSONResponse, Response, StreamingResponse
from sglang.backend.runtime_endpoint import RuntimeEndpoint
from sglang.lang.backend.runtime_endpoint import RuntimeEndpoint
from sglang.srt.constrained import disable_cache
from sglang.srt.hf_transformers_utils import get_tokenizer
from sglang.srt.managers.controller.manager_multi import (

View File

@@ -166,6 +166,15 @@ class ServerArgs:
"--quantization",
type=str,
default=ServerArgs.quantization,
choices=[
"awq",
"fp8",
"gptq",
"marlin",
"gptq_marlin",
"squeezellm",
"bitsandbytes",
],
help="The quantization method.",
)
parser.add_argument(
@@ -243,13 +252,13 @@ class ServerArgs:
parser.add_argument(
"--show-time-cost",
action="store_true",
help="Show time cost of custom marks",
help="Show time cost of custom marks.",
)
parser.add_argument(
"--api-key",
type=str,
default=ServerArgs.api_key,
help="Set API key of the server",
help="Set API key of the server.",
)
# Data parallelism
@@ -285,17 +294,17 @@ class ServerArgs:
parser.add_argument(
"--disable-flashinfer",
action="store_true",
help="Disable flashinfer inference kernels",
help="Disable flashinfer inference kernels.",
)
parser.add_argument(
"--disable-radix-cache",
action="store_true",
help="Disable RadixAttention",
help="Disable RadixAttention for prefix caching.",
)
parser.add_argument(
"--disable-regex-jump-forward",
action="store_true",
help="Disable regex jump-forward",
help="Disable regex jump-forward.",
)
parser.add_argument(
"--disable-cuda-graph",

View File

@@ -306,7 +306,7 @@ def test_image_qa():
assert (
"taxi" in state.messages()[-1]["content"]
or "car" in state.messages()[-1]["content"]
)
), f"{state.messages()[-1]['content']}"
def test_stream():

View File

@@ -6,9 +6,9 @@ from functools import partial
import numpy as np
import requests
from sglang.backend.openai import OpenAI
from sglang.backend.runtime_endpoint import RuntimeEndpoint
from sglang.global_config import global_config
from sglang.lang.backend.openai import OpenAI
from sglang.lang.backend.runtime_endpoint import RuntimeEndpoint
from sglang.utils import get_exception_traceback