fix: remove redundant rotary embedding cache recomputation in MiniCPM (#8022)
This commit is contained in:
@@ -138,8 +138,6 @@ class MiniCPMAttention(nn.Module):
|
||||
base=rope_theta,
|
||||
rope_scaling=rope_scaling,
|
||||
)
|
||||
# set rope as fp32 instead of bf16
|
||||
self.rotary_emb.cos_sin_cache = self.rotary_emb._compute_cos_sin_cache()
|
||||
self.attn = RadixAttention(
|
||||
self.num_heads,
|
||||
self.head_dim,
|
||||
|
||||
Reference in New Issue
Block a user