Improve doc strings (#518)
This commit is contained in:
@@ -1,4 +1,4 @@
|
||||
"""Some Public API Definitions"""
|
||||
"""Public APIs of the language."""
|
||||
|
||||
import os
|
||||
import re
|
||||
|
||||
@@ -1,3 +1,5 @@
|
||||
"""Launch the inference server."""
|
||||
|
||||
import argparse
|
||||
|
||||
from sglang.srt.server import ServerArgs, launch_server
|
||||
@@ -8,4 +10,4 @@ if __name__ == "__main__":
|
||||
args = parser.parse_args()
|
||||
server_args = ServerArgs.from_cli_args(args)
|
||||
|
||||
launch_server(server_args, None)
|
||||
launch_server(server_args, None)
|
||||
@@ -1,3 +1,4 @@
|
||||
"""Launch the inference server for Llava-video model."""
|
||||
import argparse
|
||||
import multiprocessing as mp
|
||||
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
"""Cache for the compressed finite state machine."""
|
||||
from sglang.srt.constrained import RegexFSM, TransformerTokenizer
|
||||
from sglang.srt.constrained.base_cache import BaseCache
|
||||
|
||||
|
||||
@@ -1,3 +1,7 @@
|
||||
"""
|
||||
Faster constrained decoding.
|
||||
Reference: https://lmsys.org/blog/2024-02-05-compressed-fsm/
|
||||
"""
|
||||
import interegular
|
||||
|
||||
from sglang.srt.constrained import FSMInfo, disk_cache, make_deterministic_fsm
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
"""Conversation templates."""
|
||||
# Adapted from
|
||||
# https://github.com/lm-sys/FastChat/blob/main/fastchat/conversation.py
|
||||
import dataclasses
|
||||
|
||||
@@ -1,4 +1,6 @@
|
||||
"""
|
||||
Flush the KV cache.
|
||||
|
||||
Usage:
|
||||
python3 -m sglang.srt.flush_cache --url http://localhost:30000
|
||||
"""
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
"""Logits processing."""
|
||||
import torch
|
||||
from torch import nn
|
||||
from vllm.distributed import (
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
"""Radix attention."""
|
||||
import torch
|
||||
import numpy as np
|
||||
from torch import nn
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
"""A data parallel worker thread."""
|
||||
|
||||
import asyncio
|
||||
import logging
|
||||
import queue
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
"""Meta data for requests and batches"""
|
||||
|
||||
from dataclasses import dataclass
|
||||
from enum import IntEnum, auto
|
||||
from typing import List
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
"""ModelRunner runs the forward passes of the models."""
|
||||
import importlib
|
||||
import importlib.resources
|
||||
import logging
|
||||
|
||||
@@ -1,3 +1,6 @@
|
||||
"""
|
||||
The radix tree data structure for managing the KV cache.
|
||||
"""
|
||||
import heapq
|
||||
import time
|
||||
from collections import defaultdict
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
"""Request scheduler heuristic."""
|
||||
import random
|
||||
from collections import defaultdict
|
||||
|
||||
|
||||
@@ -1,3 +1,5 @@
|
||||
"""A tensor parallel worker."""
|
||||
|
||||
import asyncio
|
||||
import logging
|
||||
import time
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
"""DetokenizerManager is a process that detokenizes the token ids."""
|
||||
import asyncio
|
||||
import inspect
|
||||
|
||||
|
||||
@@ -1,3 +1,8 @@
|
||||
"""
|
||||
The definition of objects transfered between different
|
||||
processes (TokenizerManager, DetokenizerManager, Controller).
|
||||
"""
|
||||
|
||||
import uuid
|
||||
from dataclasses import dataclass
|
||||
from typing import Dict, List, Optional, Union
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
"""TokenizerManager is a process that tokenizes the text."""
|
||||
import asyncio
|
||||
import concurrent.futures
|
||||
import dataclasses
|
||||
@@ -283,7 +284,7 @@ class TokenizerManager:
|
||||
req = AbortReq(rid)
|
||||
self.send_to_router.send_pyobj(req)
|
||||
|
||||
def create_abort_task(self, obj):
|
||||
def create_abort_task(self, obj: GenerateReqInput):
|
||||
# Abort the request if the client is disconnected.
|
||||
async def abort_request():
|
||||
await asyncio.sleep(3)
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
"""pydantic models for OpenAI API protocol"""
|
||||
"""Pydantic models for OpenAI API protocol"""
|
||||
|
||||
import time
|
||||
from typing import Dict, List, Optional, Union
|
||||
|
||||
@@ -1,4 +1,7 @@
|
||||
"""SRT: SGLang Runtime"""
|
||||
"""
|
||||
The entry point of inference server.
|
||||
SRT = SGLang Runtime.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import dataclasses
|
||||
@@ -10,7 +13,7 @@ import sys
|
||||
import threading
|
||||
import time
|
||||
from http import HTTPStatus
|
||||
from typing import Optional
|
||||
from typing import Optional, Dict
|
||||
|
||||
# Fix a bug of Python threading
|
||||
setattr(threading, "_register_atexit", lambda *args, **kwargs: None)
|
||||
@@ -148,7 +151,6 @@ def launch_server(server_args: ServerArgs, pipe_finish_writer, model_overide_arg
|
||||
server_args.dp_size,
|
||||
)
|
||||
|
||||
# Init local models port args
|
||||
ports = server_args.additional_ports
|
||||
tp = server_args.tp_size
|
||||
model_port_args = []
|
||||
@@ -269,6 +271,12 @@ def launch_server(server_args: ServerArgs, pipe_finish_writer, model_overide_arg
|
||||
|
||||
|
||||
class Runtime:
|
||||
"""
|
||||
A wrapper for the server.
|
||||
This is used for launching the server in a python program without
|
||||
using the commond line interface.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
log_level: str = "error",
|
||||
@@ -339,7 +347,7 @@ class Runtime:
|
||||
async def add_request(
|
||||
self,
|
||||
prompt: str,
|
||||
sampling_params,
|
||||
sampling_params: Dict,
|
||||
):
|
||||
json_data = {
|
||||
"text": prompt,
|
||||
|
||||
@@ -1,6 +1,4 @@
|
||||
"""
|
||||
This file contains the SGL programs used for unit testing.
|
||||
"""
|
||||
"""This file contains the SGL programs used for unit testing."""
|
||||
|
||||
import json
|
||||
import re
|
||||
|
||||
Reference in New Issue
Block a user