diff --git a/docs/developer_guide/contribution_guide.md b/docs/developer_guide/contribution_guide.md index 337ff77d2..e2171f447 100644 --- a/docs/developer_guide/contribution_guide.md +++ b/docs/developer_guide/contribution_guide.md @@ -76,11 +76,12 @@ If you modify files protected by code owners, their approval is required to merg - Try to make functions as pure as possible. Avoid in-place modification of arguments. ## How to update sgl-kernel -Since sglang and sgl-kernel are separate Python packages, our current GitHub CI infrastructure does not support updating a kernel and using it immediately within the same pull request (PR). To add a new kernel or modify an existing one in the sgl-kernel package, you must use multiple PRs. +Since sglang and sgl-kernel are separate Python packages, our current GitHub CI infrastructure does not support updating a kernel and using it immediately within the same pull request (PR). +To add a new kernel or modify an existing one in the sgl-kernel package, you must use multiple PRs. Follow these steps: -1. Submit a PR to update the sgl-kernel source code without using it (e.g., [#8884](https://github.com/sgl-project/sglang/pull/8884/files)). +1. Submit a PR to update the sgl-kernel source code without using it in sglang python package (e.g., [#8884](https://github.com/sgl-project/sglang/pull/8884/files)). 2. Bump the version of sgl-kernel (e.g., [#9220](https://github.com/sgl-project/sglang/pull/9220/files)). - Once merged, this will trigger an automatic release of the sgl-kernel wheel to PyPI. - If not urgent, you can wait for other people to release the wheel. A new version will typically be released within one week. diff --git a/sgl-kernel/python/sgl_kernel/__init__.py b/sgl-kernel/python/sgl_kernel/__init__.py index 515aa4adf..6480a097d 100755 --- a/sgl-kernel/python/sgl_kernel/__init__.py +++ b/sgl-kernel/python/sgl_kernel/__init__.py @@ -23,6 +23,7 @@ from sgl_kernel.cutlass_moe import cutlass_w4a8_moe_mm, get_cutlass_w4a8_moe_mm_ from sgl_kernel.elementwise import ( FusedSetKVBufferArg, apply_rope_with_cos_sin_cache_inplace, + downcast_fp8, fused_add_rmsnorm, gelu_and_mul, gelu_tanh_and_mul, @@ -92,6 +93,14 @@ from sgl_kernel.sampling import ( top_p_renorm_prob, top_p_sampling_from_probs, ) +from sgl_kernel.speculative import ( + build_tree_kernel_efficient, + segment_packbits, + tree_speculative_sampling_target_only, + verify_tree_greedy, +) +from sgl_kernel.top_k import fast_topk +from sgl_kernel.version import __version__ def create_greenctx_stream_by_value(*args, **kwargs): @@ -104,13 +113,3 @@ def get_sm_available(*args, **kwargs): from sgl_kernel.spatial import get_sm_available as _impl return _impl(*args, **kwargs) - - -from sgl_kernel.speculative import ( - build_tree_kernel_efficient, - segment_packbits, - tree_speculative_sampling_target_only, - verify_tree_greedy, -) -from sgl_kernel.top_k import fast_topk -from sgl_kernel.version import __version__ diff --git a/sgl-kernel/setup_rocm.py b/sgl-kernel/setup_rocm.py index 02c2019ff..2105c7c1f 100644 --- a/sgl-kernel/setup_rocm.py +++ b/sgl-kernel/setup_rocm.py @@ -43,12 +43,12 @@ include_dirs = [ sources = [ "csrc/allreduce/custom_all_reduce.hip", "csrc/allreduce/quick_all_reduce.cu", + "csrc/common_extension_rocm.cc", "csrc/elementwise/activation.cu", + "csrc/grammar/apply_token_bitmask_inplace_cuda.cu", "csrc/moe/moe_align_kernel.cu", "csrc/moe/moe_topk_softmax_kernels.cu", "csrc/speculative/eagle_utils.cu", - "csrc/common_extension_rocm.cc", - "csrc/grammar/apply_token_bitmask_inplace_cuda.cu", ] cxx_flags = ["-O3"]