[Worker][V1] Support sleep mode for v1 (#1084)

### What this PR does / why we need it?
 Support sleep mode for v1

Signed-off-by: wangli <wangli858794774@gmail.com>
This commit is contained in:
Li Wang
2025-06-06 21:54:02 +08:00
committed by GitHub
parent 0395ab30be
commit a2552e10e4
5 changed files with 65 additions and 60 deletions

View File

@@ -1235,11 +1235,6 @@ class NPUModelRunner(LoRAModelRunnerMixin):
# assert self.lora_manager is not None, "LoRA is not enabled"
# TODO: call maybe_profile_with_lora()
dummy_kv_caches = [
torch.tensor((), dtype=torch.float32, device=self.device)
for _ in range(self.num_attn_layers)
]
# Trigger compilation for general shape.
hidden_states = self._dummy_run(self.max_num_tokens)
@@ -1250,7 +1245,7 @@ class NPUModelRunner(LoRAModelRunnerMixin):
logits = None
NPUPlatform.synchronize()
del hidden_states, logits, dummy_kv_caches
del hidden_states, logits
self.encoder_cache.clear()
gc.collect()