[Perf] Avoid performing index selection of sin/cos cache every layer (#1890)
Optimize number of index selections of sin/cos cache.
- vLLM version: v0.10.0
- vLLM main:
656c24f1b5
Signed-off-by: whx-sjtu <2952154980@qq.com>
This commit is contained in:
@@ -1799,6 +1799,9 @@ class NPUModelRunner(LoRAModelRunnerMixin):
|
||||
attn_metadata.decode.input_positions)
|
||||
torch._dynamo.mark_static(
|
||||
get_forward_context().mc2_mask)
|
||||
if hasattr(attn_metadata.decode, "sin"):
|
||||
torch._dynamo.mark_static(attn_metadata.decode.sin)
|
||||
torch._dynamo.mark_static(attn_metadata.decode.cos)
|
||||
torch._dynamo.mark_static(attn_metadata.slot_mapping)
|
||||
for kv in self.kv_caches:
|
||||
assert isinstance(
|
||||
|
||||
Reference in New Issue
Block a user