Improve doc strings (#518)

This commit is contained in:
Lianmin Zheng
2024-06-08 02:06:52 -07:00
parent e8a2327d52
commit f6dbd24043
22 changed files with 52 additions and 17 deletions

View File

@@ -1,4 +1,4 @@
"""Some Public API Definitions"""
"""Public APIs of the language."""
import os
import re

View File

@@ -1,3 +1,5 @@
"""Launch the inference server."""
import argparse
from sglang.srt.server import ServerArgs, launch_server
@@ -8,4 +10,4 @@ if __name__ == "__main__":
args = parser.parse_args()
server_args = ServerArgs.from_cli_args(args)
launch_server(server_args, None)
launch_server(server_args, None)

View File

@@ -1,3 +1,4 @@
"""Launch the inference server for Llava-video model."""
import argparse
import multiprocessing as mp

View File

@@ -1,3 +1,4 @@
"""Cache for the compressed finite state machine."""
from sglang.srt.constrained import RegexFSM, TransformerTokenizer
from sglang.srt.constrained.base_cache import BaseCache

View File

@@ -1,3 +1,7 @@
"""
Faster constrained decoding.
Reference: https://lmsys.org/blog/2024-02-05-compressed-fsm/
"""
import interegular
from sglang.srt.constrained import FSMInfo, disk_cache, make_deterministic_fsm

View File

@@ -1,3 +1,4 @@
"""Conversation templates."""
# Adapted from
# https://github.com/lm-sys/FastChat/blob/main/fastchat/conversation.py
import dataclasses

View File

@@ -1,4 +1,6 @@
"""
Flush the KV cache.
Usage:
python3 -m sglang.srt.flush_cache --url http://localhost:30000
"""

View File

@@ -1,3 +1,4 @@
"""Logits processing."""
import torch
from torch import nn
from vllm.distributed import (

View File

@@ -1,3 +1,4 @@
"""Radix attention."""
import torch
import numpy as np
from torch import nn

View File

@@ -1,4 +1,5 @@
"""A data parallel worker thread."""
import asyncio
import logging
import queue

View File

@@ -1,4 +1,5 @@
"""Meta data for requests and batches"""
from dataclasses import dataclass
from enum import IntEnum, auto
from typing import List

View File

@@ -1,3 +1,4 @@
"""ModelRunner runs the forward passes of the models."""
import importlib
import importlib.resources
import logging

View File

@@ -1,3 +1,6 @@
"""
The radix tree data structure for managing the KV cache.
"""
import heapq
import time
from collections import defaultdict

View File

@@ -1,3 +1,4 @@
"""Request scheduler heuristic."""
import random
from collections import defaultdict

View File

@@ -1,3 +1,5 @@
"""A tensor parallel worker."""
import asyncio
import logging
import time

View File

@@ -1,3 +1,4 @@
"""DetokenizerManager is a process that detokenizes the token ids."""
import asyncio
import inspect

View File

@@ -1,3 +1,8 @@
"""
The definition of objects transfered between different
processes (TokenizerManager, DetokenizerManager, Controller).
"""
import uuid
from dataclasses import dataclass
from typing import Dict, List, Optional, Union

View File

@@ -1,3 +1,4 @@
"""TokenizerManager is a process that tokenizes the text."""
import asyncio
import concurrent.futures
import dataclasses
@@ -283,7 +284,7 @@ class TokenizerManager:
req = AbortReq(rid)
self.send_to_router.send_pyobj(req)
def create_abort_task(self, obj):
def create_abort_task(self, obj: GenerateReqInput):
# Abort the request if the client is disconnected.
async def abort_request():
await asyncio.sleep(3)

View File

@@ -1,4 +1,4 @@
"""pydantic models for OpenAI API protocol"""
"""Pydantic models for OpenAI API protocol"""
import time
from typing import Dict, List, Optional, Union

View File

@@ -1,4 +1,7 @@
"""SRT: SGLang Runtime"""
"""
The entry point of inference server.
SRT = SGLang Runtime.
"""
import asyncio
import dataclasses
@@ -10,7 +13,7 @@ import sys
import threading
import time
from http import HTTPStatus
from typing import Optional
from typing import Optional, Dict
# Fix a bug of Python threading
setattr(threading, "_register_atexit", lambda *args, **kwargs: None)
@@ -148,7 +151,6 @@ def launch_server(server_args: ServerArgs, pipe_finish_writer, model_overide_arg
server_args.dp_size,
)
# Init local models port args
ports = server_args.additional_ports
tp = server_args.tp_size
model_port_args = []
@@ -269,6 +271,12 @@ def launch_server(server_args: ServerArgs, pipe_finish_writer, model_overide_arg
class Runtime:
"""
A wrapper for the server.
This is used for launching the server in a python program without
using the commond line interface.
"""
def __init__(
self,
log_level: str = "error",
@@ -339,7 +347,7 @@ class Runtime:
async def add_request(
self,
prompt: str,
sampling_params,
sampling_params: Dict,
):
json_data = {
"text": prompt,

View File

@@ -1,6 +1,4 @@
"""
This file contains the SGL programs used for unit testing.
"""
"""This file contains the SGL programs used for unit testing."""
import json
import re