fix per token cuda kernel hidden dim cannot divide by 16 (#8543)
This commit is contained in:
@@ -36,7 +36,7 @@ def sglang_per_token_quant_fp8(
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"num_tokens,hidden_dim",
|
||||
list(itertools.product([128, 256, 512], [512, 2048, 4096])),
|
||||
list(itertools.product([128, 256, 512], [512, 1368, 2048, 4096])),
|
||||
)
|
||||
def test_per_token_quant_compare_implementations(
|
||||
num_tokens: int,
|
||||
|
||||
Reference in New Issue
Block a user