[Minor] Fix the style of sgl-kernel (#9332)
This commit is contained in:
@@ -76,11 +76,12 @@ If you modify files protected by code owners, their approval is required to merg
|
||||
- Try to make functions as pure as possible. Avoid in-place modification of arguments.
|
||||
|
||||
## How to update sgl-kernel
|
||||
Since sglang and sgl-kernel are separate Python packages, our current GitHub CI infrastructure does not support updating a kernel and using it immediately within the same pull request (PR). To add a new kernel or modify an existing one in the sgl-kernel package, you must use multiple PRs.
|
||||
Since sglang and sgl-kernel are separate Python packages, our current GitHub CI infrastructure does not support updating a kernel and using it immediately within the same pull request (PR).
|
||||
To add a new kernel or modify an existing one in the sgl-kernel package, you must use multiple PRs.
|
||||
|
||||
Follow these steps:
|
||||
|
||||
1. Submit a PR to update the sgl-kernel source code without using it (e.g., [#8884](https://github.com/sgl-project/sglang/pull/8884/files)).
|
||||
1. Submit a PR to update the sgl-kernel source code without using it in sglang python package (e.g., [#8884](https://github.com/sgl-project/sglang/pull/8884/files)).
|
||||
2. Bump the version of sgl-kernel (e.g., [#9220](https://github.com/sgl-project/sglang/pull/9220/files)).
|
||||
- Once merged, this will trigger an automatic release of the sgl-kernel wheel to PyPI.
|
||||
- If not urgent, you can wait for other people to release the wheel. A new version will typically be released within one week.
|
||||
|
||||
@@ -23,6 +23,7 @@ from sgl_kernel.cutlass_moe import cutlass_w4a8_moe_mm, get_cutlass_w4a8_moe_mm_
|
||||
from sgl_kernel.elementwise import (
|
||||
FusedSetKVBufferArg,
|
||||
apply_rope_with_cos_sin_cache_inplace,
|
||||
downcast_fp8,
|
||||
fused_add_rmsnorm,
|
||||
gelu_and_mul,
|
||||
gelu_tanh_and_mul,
|
||||
@@ -92,6 +93,14 @@ from sgl_kernel.sampling import (
|
||||
top_p_renorm_prob,
|
||||
top_p_sampling_from_probs,
|
||||
)
|
||||
from sgl_kernel.speculative import (
|
||||
build_tree_kernel_efficient,
|
||||
segment_packbits,
|
||||
tree_speculative_sampling_target_only,
|
||||
verify_tree_greedy,
|
||||
)
|
||||
from sgl_kernel.top_k import fast_topk
|
||||
from sgl_kernel.version import __version__
|
||||
|
||||
|
||||
def create_greenctx_stream_by_value(*args, **kwargs):
|
||||
@@ -104,13 +113,3 @@ def get_sm_available(*args, **kwargs):
|
||||
from sgl_kernel.spatial import get_sm_available as _impl
|
||||
|
||||
return _impl(*args, **kwargs)
|
||||
|
||||
|
||||
from sgl_kernel.speculative import (
|
||||
build_tree_kernel_efficient,
|
||||
segment_packbits,
|
||||
tree_speculative_sampling_target_only,
|
||||
verify_tree_greedy,
|
||||
)
|
||||
from sgl_kernel.top_k import fast_topk
|
||||
from sgl_kernel.version import __version__
|
||||
|
||||
@@ -43,12 +43,12 @@ include_dirs = [
|
||||
sources = [
|
||||
"csrc/allreduce/custom_all_reduce.hip",
|
||||
"csrc/allreduce/quick_all_reduce.cu",
|
||||
"csrc/common_extension_rocm.cc",
|
||||
"csrc/elementwise/activation.cu",
|
||||
"csrc/grammar/apply_token_bitmask_inplace_cuda.cu",
|
||||
"csrc/moe/moe_align_kernel.cu",
|
||||
"csrc/moe/moe_topk_softmax_kernels.cu",
|
||||
"csrc/speculative/eagle_utils.cu",
|
||||
"csrc/common_extension_rocm.cc",
|
||||
"csrc/grammar/apply_token_bitmask_inplace_cuda.cu",
|
||||
]
|
||||
|
||||
cxx_flags = ["-O3"]
|
||||
|
||||
Reference in New Issue
Block a user