fix: remove redundant rotary embedding cache recomputation in MiniCPM (#8022)
This commit is contained in:
@@ -138,8 +138,6 @@ class MiniCPMAttention(nn.Module):
|
|||||||
base=rope_theta,
|
base=rope_theta,
|
||||||
rope_scaling=rope_scaling,
|
rope_scaling=rope_scaling,
|
||||||
)
|
)
|
||||||
# set rope as fp32 instead of bf16
|
|
||||||
self.rotary_emb.cos_sin_cache = self.rotary_emb._compute_cos_sin_cache()
|
|
||||||
self.attn = RadixAttention(
|
self.attn = RadixAttention(
|
||||||
self.num_heads,
|
self.num_heads,
|
||||||
self.head_dim,
|
self.head_dim,
|
||||||
|
|||||||
Reference in New Issue
Block a user