[Worker][V1] Support sleep mode for v1 (#1084)
### What this PR does / why we need it? Support sleep mode for v1 Signed-off-by: wangli <wangli858794774@gmail.com>
This commit is contained in:
@@ -1235,11 +1235,6 @@ class NPUModelRunner(LoRAModelRunnerMixin):
|
||||
# assert self.lora_manager is not None, "LoRA is not enabled"
|
||||
# TODO: call maybe_profile_with_lora()
|
||||
|
||||
dummy_kv_caches = [
|
||||
torch.tensor((), dtype=torch.float32, device=self.device)
|
||||
for _ in range(self.num_attn_layers)
|
||||
]
|
||||
|
||||
# Trigger compilation for general shape.
|
||||
hidden_states = self._dummy_run(self.max_num_tokens)
|
||||
|
||||
@@ -1250,7 +1245,7 @@ class NPUModelRunner(LoRAModelRunnerMixin):
|
||||
logits = None
|
||||
|
||||
NPUPlatform.synchronize()
|
||||
del hidden_states, logits, dummy_kv_caches
|
||||
del hidden_states, logits
|
||||
self.encoder_cache.clear()
|
||||
gc.collect()
|
||||
|
||||
|
||||
Reference in New Issue
Block a user