[Docs] Update contribution guide (#9383)
This commit is contained in:
@@ -72,8 +72,13 @@ If you modify files protected by code owners, their approval is required to merg
|
|||||||
- Avoid code duplication. If the same code snippet (more than five lines) appears multiple times, extract it into a shared function.
|
- Avoid code duplication. If the same code snippet (more than five lines) appears multiple times, extract it into a shared function.
|
||||||
- Minimize device synchronization. Reduce expensive CPU-GPU synchronization operations, such as `tensor.item()` or `tensor.cpu()`, whenever possible. Use vectorized code.
|
- Minimize device synchronization. Reduce expensive CPU-GPU synchronization operations, such as `tensor.item()` or `tensor.cpu()`, whenever possible. Use vectorized code.
|
||||||
- Keep files concise. If a file exceeds 2,000 lines of code, split it into multiple smaller files.
|
- Keep files concise. If a file exceeds 2,000 lines of code, split it into multiple smaller files.
|
||||||
- Prioritize extreme efficiency. SGLang is a runtime, and most of your code runs on the critical path for every request. Optimize every minor overhead as much as possible.
|
- Prioritize extreme efficiency. SGLang is a runtime, and most of your code runs on the critical path for every request. Optimize all minor overheads as much as possible, especially in the model forward code.
|
||||||
- Try to make functions as pure as possible. Avoid in-place modification of arguments.
|
- A common pattern is some runtime checks in the model forward pass (e.g., [this](https://github.com/sgl-project/sglang/blob/f1b0eda55c2c4838e8ab90a0fac7fb1e3d7064ab/python/sglang/srt/models/deepseek_v2.py#L486-L491)). These are very likely the same for every layer. Please cache the result as a single boolean value whenever possible.
|
||||||
|
- Strive to make functions as pure as possible. Avoid in-place modification of arguments.
|
||||||
|
- When supporting new hardware or features, follow these guidelines:
|
||||||
|
- Do not drastically change existing code.
|
||||||
|
- Always prefer new files to introduce specific components for your new hardware (e.g., `allocator_ascend.py`).
|
||||||
|
- If you write multiple if/else blocks for new features, ensure the common path (e.g., NVIDIA hardware or the existing code path) is the first branch.
|
||||||
|
|
||||||
## How to update sgl-kernel
|
## How to update sgl-kernel
|
||||||
Since sglang and sgl-kernel are separate Python packages, our current GitHub CI infrastructure does not support updating a kernel and using it immediately within the same pull request (PR).
|
Since sglang and sgl-kernel are separate Python packages, our current GitHub CI infrastructure does not support updating a kernel and using it immediately within the same pull request (PR).
|
||||||
|
|||||||
@@ -297,9 +297,6 @@ class ServerArgs:
|
|||||||
|
|
||||||
def __post_init__(self):
|
def __post_init__(self):
|
||||||
# Check deprecated arguments
|
# Check deprecated arguments
|
||||||
def print_deprecated_warning(message: str):
|
|
||||||
logger.warning(f"\033[33m{message}\033[0m")
|
|
||||||
|
|
||||||
if self.enable_ep_moe:
|
if self.enable_ep_moe:
|
||||||
self.ep_size = self.tp_size
|
self.ep_size = self.tp_size
|
||||||
print_deprecated_warning(
|
print_deprecated_warning(
|
||||||
@@ -1955,24 +1952,25 @@ class ServerArgs:
|
|||||||
default=None,
|
default=None,
|
||||||
help="The custom dataloader which used to update the model. Should be set with a valid import path, such as my_package.weight_load_func",
|
help="The custom dataloader which used to update the model. Should be set with a valid import path, such as my_package.weight_load_func",
|
||||||
)
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--weight-loader-disable-mmap",
|
||||||
|
action="store_true",
|
||||||
|
help="Disable mmap while loading weight using safetensors.",
|
||||||
|
)
|
||||||
|
|
||||||
|
# For PD-Multiplexing
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--enable-pdmux",
|
"--enable-pdmux",
|
||||||
action="store_true",
|
action="store_true",
|
||||||
help="Enable PD-Multiplexing, PD running on greenctx stream.",
|
help="Enable PD-Multiplexing, PD running on greenctx stream.",
|
||||||
)
|
)
|
||||||
|
|
||||||
# For PD-Multiplexing
|
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--sm-group-num",
|
"--sm-group-num",
|
||||||
type=int,
|
type=int,
|
||||||
default=ServerArgs.sm_group_num,
|
default=ServerArgs.sm_group_num,
|
||||||
help="Number of sm partition groups.",
|
help="Number of sm partition groups.",
|
||||||
)
|
)
|
||||||
parser.add_argument(
|
|
||||||
"--weight-loader-disable-mmap",
|
|
||||||
action="store_true",
|
|
||||||
help="Disable mmap while loading weight using safetensors.",
|
|
||||||
)
|
|
||||||
|
|
||||||
# Deprecated arguments
|
# Deprecated arguments
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
@@ -2379,6 +2377,10 @@ class DeprecatedAction(argparse.Action):
|
|||||||
raise ValueError(self.help)
|
raise ValueError(self.help)
|
||||||
|
|
||||||
|
|
||||||
|
def print_deprecated_warning(message: str):
|
||||||
|
logger.warning(f"\033[33m{message}\033[0m")
|
||||||
|
|
||||||
|
|
||||||
def auto_choose_speculative_params(self: ServerArgs):
|
def auto_choose_speculative_params(self: ServerArgs):
|
||||||
"""
|
"""
|
||||||
Automatically choose the parameters for speculative decoding.
|
Automatically choose the parameters for speculative decoding.
|
||||||
|
|||||||
Reference in New Issue
Block a user