Fix loading KV quantization scale; Enable modelopt kv cache (#4686)
Co-authored-by: qingquansong <ustcsqq@gmail.com>
This commit is contained in:
@@ -193,6 +193,7 @@ class Gemma3Attention(nn.Module):
|
||||
# Module must also define `get_attention_sliding_window_size` to correctly initialize
|
||||
# attention backend in `ForwardBatch`.
|
||||
sliding_window_size=self.sliding_window,
|
||||
quant_config=quant_config,
|
||||
prefix=add_prefix("attn", prefix),
|
||||
)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user