[feat] mlapo add bf16 no_quant support (#4852)
### What this PR does / why we need it?
This PR adds mlapo operation support for bf16 no_quant mode.
### Does this PR introduce _any_ user-facing change?
This PR makes quant related parameters optional.
### How was this patch tested?
CI passed with new added/existing test.
- vLLM version: v0.12.0
- vLLM main:
ad32e3e19c
---------
Signed-off-by: chenjunyi <isjunyi.chen@gmail.com>
This commit is contained in:
@@ -90,6 +90,11 @@ struct MlaTilingData {
|
||||
uint32_t esqHeadTail{0};
|
||||
uint32_t esqColLoop{0};
|
||||
uint32_t esqColTail{0};
|
||||
|
||||
// hidden state dimension
|
||||
uint32_t hiddenStateDim{7168};
|
||||
|
||||
uint32_t isWeightQuantized{1};
|
||||
};
|
||||
|
||||
#endif // MLAPREPROCESS_TILING_H
|
||||
|
||||
Reference in New Issue
Block a user