[Minor] Fix the style of sgl-kernel (#9332)
This commit is contained in:
@@ -76,11 +76,12 @@ If you modify files protected by code owners, their approval is required to merg
|
|||||||
- Try to make functions as pure as possible. Avoid in-place modification of arguments.
|
- Try to make functions as pure as possible. Avoid in-place modification of arguments.
|
||||||
|
|
||||||
## How to update sgl-kernel
|
## How to update sgl-kernel
|
||||||
Since sglang and sgl-kernel are separate Python packages, our current GitHub CI infrastructure does not support updating a kernel and using it immediately within the same pull request (PR). To add a new kernel or modify an existing one in the sgl-kernel package, you must use multiple PRs.
|
Since sglang and sgl-kernel are separate Python packages, our current GitHub CI infrastructure does not support updating a kernel and using it immediately within the same pull request (PR).
|
||||||
|
To add a new kernel or modify an existing one in the sgl-kernel package, you must use multiple PRs.
|
||||||
|
|
||||||
Follow these steps:
|
Follow these steps:
|
||||||
|
|
||||||
1. Submit a PR to update the sgl-kernel source code without using it (e.g., [#8884](https://github.com/sgl-project/sglang/pull/8884/files)).
|
1. Submit a PR to update the sgl-kernel source code without using it in sglang python package (e.g., [#8884](https://github.com/sgl-project/sglang/pull/8884/files)).
|
||||||
2. Bump the version of sgl-kernel (e.g., [#9220](https://github.com/sgl-project/sglang/pull/9220/files)).
|
2. Bump the version of sgl-kernel (e.g., [#9220](https://github.com/sgl-project/sglang/pull/9220/files)).
|
||||||
- Once merged, this will trigger an automatic release of the sgl-kernel wheel to PyPI.
|
- Once merged, this will trigger an automatic release of the sgl-kernel wheel to PyPI.
|
||||||
- If not urgent, you can wait for other people to release the wheel. A new version will typically be released within one week.
|
- If not urgent, you can wait for other people to release the wheel. A new version will typically be released within one week.
|
||||||
|
|||||||
@@ -23,6 +23,7 @@ from sgl_kernel.cutlass_moe import cutlass_w4a8_moe_mm, get_cutlass_w4a8_moe_mm_
|
|||||||
from sgl_kernel.elementwise import (
|
from sgl_kernel.elementwise import (
|
||||||
FusedSetKVBufferArg,
|
FusedSetKVBufferArg,
|
||||||
apply_rope_with_cos_sin_cache_inplace,
|
apply_rope_with_cos_sin_cache_inplace,
|
||||||
|
downcast_fp8,
|
||||||
fused_add_rmsnorm,
|
fused_add_rmsnorm,
|
||||||
gelu_and_mul,
|
gelu_and_mul,
|
||||||
gelu_tanh_and_mul,
|
gelu_tanh_and_mul,
|
||||||
@@ -92,6 +93,14 @@ from sgl_kernel.sampling import (
|
|||||||
top_p_renorm_prob,
|
top_p_renorm_prob,
|
||||||
top_p_sampling_from_probs,
|
top_p_sampling_from_probs,
|
||||||
)
|
)
|
||||||
|
from sgl_kernel.speculative import (
|
||||||
|
build_tree_kernel_efficient,
|
||||||
|
segment_packbits,
|
||||||
|
tree_speculative_sampling_target_only,
|
||||||
|
verify_tree_greedy,
|
||||||
|
)
|
||||||
|
from sgl_kernel.top_k import fast_topk
|
||||||
|
from sgl_kernel.version import __version__
|
||||||
|
|
||||||
|
|
||||||
def create_greenctx_stream_by_value(*args, **kwargs):
|
def create_greenctx_stream_by_value(*args, **kwargs):
|
||||||
@@ -104,13 +113,3 @@ def get_sm_available(*args, **kwargs):
|
|||||||
from sgl_kernel.spatial import get_sm_available as _impl
|
from sgl_kernel.spatial import get_sm_available as _impl
|
||||||
|
|
||||||
return _impl(*args, **kwargs)
|
return _impl(*args, **kwargs)
|
||||||
|
|
||||||
|
|
||||||
from sgl_kernel.speculative import (
|
|
||||||
build_tree_kernel_efficient,
|
|
||||||
segment_packbits,
|
|
||||||
tree_speculative_sampling_target_only,
|
|
||||||
verify_tree_greedy,
|
|
||||||
)
|
|
||||||
from sgl_kernel.top_k import fast_topk
|
|
||||||
from sgl_kernel.version import __version__
|
|
||||||
|
|||||||
@@ -43,12 +43,12 @@ include_dirs = [
|
|||||||
sources = [
|
sources = [
|
||||||
"csrc/allreduce/custom_all_reduce.hip",
|
"csrc/allreduce/custom_all_reduce.hip",
|
||||||
"csrc/allreduce/quick_all_reduce.cu",
|
"csrc/allreduce/quick_all_reduce.cu",
|
||||||
|
"csrc/common_extension_rocm.cc",
|
||||||
"csrc/elementwise/activation.cu",
|
"csrc/elementwise/activation.cu",
|
||||||
|
"csrc/grammar/apply_token_bitmask_inplace_cuda.cu",
|
||||||
"csrc/moe/moe_align_kernel.cu",
|
"csrc/moe/moe_align_kernel.cu",
|
||||||
"csrc/moe/moe_topk_softmax_kernels.cu",
|
"csrc/moe/moe_topk_softmax_kernels.cu",
|
||||||
"csrc/speculative/eagle_utils.cu",
|
"csrc/speculative/eagle_utils.cu",
|
||||||
"csrc/common_extension_rocm.cc",
|
|
||||||
"csrc/grammar/apply_token_bitmask_inplace_cuda.cu",
|
|
||||||
]
|
]
|
||||||
|
|
||||||
cxx_flags = ["-O3"]
|
cxx_flags = ["-O3"]
|
||||||
|
|||||||
Reference in New Issue
Block a user