chore: upgrade sgl-kernel v0.1.8.post2 (#7186)
Co-authored-by: zhyncs <me@zhyncs.com>
This commit is contained in:
@@ -49,7 +49,7 @@ runtime_common = [
|
|||||||
|
|
||||||
srt = [
|
srt = [
|
||||||
"sglang[runtime_common]",
|
"sglang[runtime_common]",
|
||||||
"sgl-kernel==0.1.8.post1",
|
"sgl-kernel==0.1.8.post2",
|
||||||
"flashinfer_python==0.2.6.post1",
|
"flashinfer_python==0.2.6.post1",
|
||||||
"torch==2.7.1",
|
"torch==2.7.1",
|
||||||
"torchaudio==2.7.1",
|
"torchaudio==2.7.1",
|
||||||
|
|||||||
@@ -605,7 +605,7 @@ def _set_envs_and_config(server_args: ServerArgs):
|
|||||||
if _is_cuda:
|
if _is_cuda:
|
||||||
assert_pkg_version(
|
assert_pkg_version(
|
||||||
"sgl-kernel",
|
"sgl-kernel",
|
||||||
"0.1.8.post1",
|
"0.1.8.post2",
|
||||||
"Please reinstall the latest version with `pip install sgl-kernel --force-reinstall`",
|
"Please reinstall the latest version with `pip install sgl-kernel --force-reinstall`",
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|||||||
@@ -280,6 +280,7 @@ class CutlassMLABackend(FlashInferMLAAttnBackend):
|
|||||||
seq_lens=forward_batch.seq_lens.to(torch.int32),
|
seq_lens=forward_batch.seq_lens.to(torch.int32),
|
||||||
page_table=self.forward_metadata.block_kv_indices,
|
page_table=self.forward_metadata.block_kv_indices,
|
||||||
workspace=self.forward_metadata.workspace,
|
workspace=self.forward_metadata.workspace,
|
||||||
|
sm_scale=layer.scaling,
|
||||||
num_kv_splits=1,
|
num_kv_splits=1,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|||||||
@@ -52,7 +52,11 @@ def grouped_gemm_nt_f8f8bf16_masked(
|
|||||||
expected_m, n, k, num_groups, kernel_type
|
expected_m, n, k, num_groups, kernel_type
|
||||||
):
|
):
|
||||||
_grouped_gemm_nt_f8f8bf16_masked_raw(
|
_grouped_gemm_nt_f8f8bf16_masked_raw(
|
||||||
lhs, rhs, out, masked_m, expected_m,
|
lhs,
|
||||||
|
rhs,
|
||||||
|
out,
|
||||||
|
masked_m,
|
||||||
|
expected_m,
|
||||||
**({"recipe": recipe} if DEEPGEMM_V202506 else {})
|
**({"recipe": recipe} if DEEPGEMM_V202506 else {})
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user