feat: add benchmark serving (#657)
This commit is contained in:
@@ -7,6 +7,23 @@ from pydantic import BaseModel, Field
|
||||
from typing_extensions import Literal
|
||||
|
||||
|
||||
class ModelCard(BaseModel):
|
||||
"""Model cards."""
|
||||
|
||||
id: str
|
||||
object: str = "model"
|
||||
created: int = Field(default_factory=lambda: int(time.time()))
|
||||
owned_by: str = "sglang"
|
||||
root: Optional[str] = None
|
||||
|
||||
|
||||
class ModelList(BaseModel):
|
||||
"""Model list consists of model cards."""
|
||||
|
||||
object: str = "list"
|
||||
data: List[ModelCard] = []
|
||||
|
||||
|
||||
class ErrorResponse(BaseModel):
|
||||
object: str = "error"
|
||||
message: str
|
||||
|
||||
@@ -44,6 +44,7 @@ from sglang.srt.openai_api_adapter import (
|
||||
v1_chat_completions,
|
||||
v1_completions,
|
||||
)
|
||||
from sglang.srt.openai_protocol import ModelCard, ModelList
|
||||
from sglang.srt.server_args import PortArgs, ServerArgs
|
||||
from sglang.srt.utils import (
|
||||
API_KEY_HEADER_NAME,
|
||||
@@ -73,6 +74,21 @@ async def health() -> Response:
|
||||
return Response(status_code=200)
|
||||
|
||||
|
||||
def get_model_list():
|
||||
"""Available models."""
|
||||
model_names = [tokenizer_manager.model_path]
|
||||
return model_names
|
||||
|
||||
|
||||
@app.get("/v1/models")
|
||||
def available_models():
|
||||
"""Show available models."""
|
||||
model_cards = []
|
||||
for model_name in get_model_list():
|
||||
model_cards.append(ModelCard(id=model_name, root=model_name))
|
||||
return ModelList(data=model_cards)
|
||||
|
||||
|
||||
@app.get("/get_model_info")
|
||||
async def get_model_info():
|
||||
result = {
|
||||
|
||||
Reference in New Issue
Block a user