[1/2] Support Qserve (#6457)

Co-authored-by: yych0745 <1398089567@qq.com>
Co-authored-by: sleepcoo <sleepcoo@gmail.com>
This commit is contained in:
HandH1998
2025-05-22 10:48:59 +08:00
committed by GitHub
parent 6ce0ed073b
commit 4d643f6c7a
10 changed files with 2086 additions and 0 deletions

View File

@@ -404,3 +404,24 @@ void convert_vertical_slash_indexes_mergehead(
* From XGrammar
*/
void ApplyTokenBitmaskInplace(at::Tensor logits, at::Tensor bitmask, at::optional<at::Tensor> indices = at::nullopt);
/*
* From QServe
*/
void qserve_w4a8_per_chn_gemm(
const torch::Tensor& _in_feats,
const torch::Tensor& _kernel,
const torch::Tensor& _wscales,
const torch::Tensor& _ascales,
const torch::Tensor& _w_szs,
const torch::Tensor& _a_ssums,
torch::Tensor& _out_feats);
void qserve_w4a8_per_group_gemm(
const torch::Tensor& _in_feats,
const torch::Tensor& _kernel,
const torch::Tensor& _zeros,
const torch::Tensor& _scales_i8,
const torch::Tensor& _wscales,
const torch::Tensor& _ascales,
torch::Tensor& _out_feats);