Improve doc strings (#518)

2024-06-08 02:06:52 -07:00
parent e8a2327d52
commit f6dbd24043
22 changed files with 52 additions and 17 deletions
--- a/python/sglang/api.py
+++ b/python/sglang/api.py
@@ -1,4 +1,4 @@
-"""Some Public API Definitions"""
+"""Public APIs of the language."""

 import os
 import re
--- a/python/sglang/launch_server.py
+++ b/python/sglang/launch_server.py
@@ -1,3 +1,5 @@
+"""Launch the inference server."""
+
 import argparse

 from sglang.srt.server import ServerArgs, launch_server
@@ -8,4 +10,4 @@ if __name__ == "__main__":
    args = parser.parse_args()
    server_args = ServerArgs.from_cli_args(args)

-    launch_server(server_args, None)
+    launch_server(server_args, None)
--- a/python/sglang/launch_server_llavavid.py
+++ b/python/sglang/launch_server_llavavid.py
@@ -1,3 +1,4 @@
+"""Launch the inference server for Llava-video model."""
 import argparse
 import multiprocessing as mp

--- a/python/sglang/srt/constrained/fsm_cache.py
+++ b/python/sglang/srt/constrained/fsm_cache.py
@@ -1,3 +1,4 @@
+"""Cache for the compressed finite state machine."""
 from sglang.srt.constrained import RegexFSM, TransformerTokenizer
 from sglang.srt.constrained.base_cache import BaseCache

--- a/python/sglang/srt/constrained/jump_forward.py
+++ b/python/sglang/srt/constrained/jump_forward.py
@@ -1,3 +1,7 @@
+"""
+Faster constrained decoding.
+Reference: https://lmsys.org/blog/2024-02-05-compressed-fsm/
+"""
 import interegular

 from sglang.srt.constrained import FSMInfo, disk_cache, make_deterministic_fsm
--- a/python/sglang/srt/conversation.py
+++ b/python/sglang/srt/conversation.py
@@ -1,3 +1,4 @@
+"""Conversation templates."""
 # Adapted from
 # https://github.com/lm-sys/FastChat/blob/main/fastchat/conversation.py
 import dataclasses
--- a/python/sglang/srt/flush_cache.py
+++ b/python/sglang/srt/flush_cache.py
@@ -1,4 +1,6 @@
 """
+Flush the KV cache.
+
 Usage:
 python3 -m sglang.srt.flush_cache --url http://localhost:30000
 """
--- a/python/sglang/srt/layers/logits_processor.py
+++ b/python/sglang/srt/layers/logits_processor.py
@@ -1,3 +1,4 @@
+"""Logits processing."""
 import torch
 from torch import nn
 from vllm.distributed import (
--- a/python/sglang/srt/layers/radix_attention.py
+++ b/python/sglang/srt/layers/radix_attention.py
@@ -1,3 +1,4 @@
+"""Radix attention."""
 import torch
 import numpy as np
 from torch import nn
--- a/python/sglang/srt/managers/controller/dp_worker.py
+++ b/python/sglang/srt/managers/controller/dp_worker.py
@@ -1,4 +1,5 @@
 """A data parallel worker thread."""
+
 import asyncio
 import logging
 import queue
--- a/python/sglang/srt/managers/controller/infer_batch.py
+++ b/python/sglang/srt/managers/controller/infer_batch.py
@@ -1,4 +1,5 @@
 """Meta data for requests and batches"""
+
 from dataclasses import dataclass
 from enum import IntEnum, auto
 from typing import List
--- a/python/sglang/srt/managers/controller/model_runner.py
+++ b/python/sglang/srt/managers/controller/model_runner.py
@@ -1,3 +1,4 @@
+"""ModelRunner runs the forward passes of the models."""
 import importlib
 import importlib.resources
 import logging
--- a/python/sglang/srt/managers/controller/radix_cache.py
+++ b/python/sglang/srt/managers/controller/radix_cache.py
@@ -1,3 +1,6 @@
+"""
+The radix tree data structure for managing the KV cache.
+"""
 import heapq
 import time
 from collections import defaultdict
--- a/python/sglang/srt/managers/controller/schedule_heuristic.py
+++ b/python/sglang/srt/managers/controller/schedule_heuristic.py
@@ -1,3 +1,4 @@
+"""Request scheduler heuristic."""
 import random
 from collections import defaultdict

--- a/python/sglang/srt/managers/controller/tp_worker.py
+++ b/python/sglang/srt/managers/controller/tp_worker.py
@@ -1,3 +1,5 @@
+"""A tensor parallel worker."""
+
 import asyncio
 import logging
 import time
--- a/python/sglang/srt/managers/detokenizer_manager.py
+++ b/python/sglang/srt/managers/detokenizer_manager.py
@@ -1,3 +1,4 @@
+"""DetokenizerManager is a process that detokenizes the token ids."""
 import asyncio
 import inspect

--- a/python/sglang/srt/managers/io_struct.py
+++ b/python/sglang/srt/managers/io_struct.py
@@ -1,3 +1,8 @@
+"""
+The definition of objects transfered between different
+processes (TokenizerManager, DetokenizerManager, Controller).
+"""
+
 import uuid
 from dataclasses import dataclass
 from typing import Dict, List, Optional, Union
--- a/python/sglang/srt/managers/tokenizer_manager.py
+++ b/python/sglang/srt/managers/tokenizer_manager.py
@@ -1,3 +1,4 @@
+"""TokenizerManager is a process that tokenizes the text."""
 import asyncio
 import concurrent.futures
 import dataclasses
@@ -283,7 +284,7 @@ class TokenizerManager:
        req = AbortReq(rid)
        self.send_to_router.send_pyobj(req)

-    def create_abort_task(self, obj):
+    def create_abort_task(self, obj: GenerateReqInput):
        # Abort the request if the client is disconnected.
        async def abort_request():
            await asyncio.sleep(3)
--- a/python/sglang/srt/openai_protocol.py
+++ b/python/sglang/srt/openai_protocol.py
@@ -1,4 +1,4 @@
-"""pydantic models for OpenAI API protocol"""
+"""Pydantic models for OpenAI API protocol"""

 import time
 from typing import Dict, List, Optional, Union
--- a/python/sglang/srt/server.py
+++ b/python/sglang/srt/server.py
@@ -1,4 +1,7 @@
-"""SRT: SGLang Runtime"""
+"""
+The entry point of inference server.
+SRT = SGLang Runtime.
+"""

 import asyncio
 import dataclasses
@@ -10,7 +13,7 @@ import sys
 import threading
 import time
 from http import HTTPStatus
-from typing import Optional
+from typing import Optional, Dict

 # Fix a bug of Python threading
 setattr(threading, "_register_atexit", lambda *args, **kwargs: None)
@@ -148,7 +151,6 @@ def launch_server(server_args: ServerArgs, pipe_finish_writer, model_overide_arg
        server_args.dp_size,
    )

-    # Init local models port args
    ports = server_args.additional_ports
    tp = server_args.tp_size
    model_port_args = []
@@ -269,6 +271,12 @@ def launch_server(server_args: ServerArgs, pipe_finish_writer, model_overide_arg


 class Runtime:
+    """
+    A wrapper for the server.
+    This is used for launching the server in a python program without
+    using the commond line interface.
+    """
+
    def __init__(
        self,
        log_level: str = "error",
@@ -339,7 +347,7 @@ class Runtime:
    async def add_request(
        self,
        prompt: str,
-        sampling_params,
+        sampling_params: Dict,
    ):
        json_data = {
            "text": prompt,
--- a/python/sglang/test/test_programs.py
+++ b/python/sglang/test/test_programs.py
@@ -1,6 +1,4 @@
-"""
-This file contains the SGL programs used for unit testing.
-"""
+"""This file contains the SGL programs used for unit testing."""

 import json
 import re