[0.18.0][BugFix] Update capture sizes after rounding operations (#8380)
### What this PR does / why we need it? This PR is partially cherry-picked from #8172. This PR aims to fix mismatched capture sizes after rounding operations when using sp or speculative. The reason is that original `self.cudagraph_capture_sizes` is no longer updated and remains as the initial sizes. Now we use `self.cudagraph_dispatcher.get_capture_descs` to the get up-to-date sizes. ### Does this PR introduce _any_ user-facing change? N/A ### How was this patch tested? by ci Signed-off-by: Zetong Li <slippersss@126.com>
This commit is contained in:
@@ -3298,12 +3298,19 @@ class NPUModelRunner(GPUModelRunner):
|
||||
with update_pass_config(self):
|
||||
super()._check_and_update_cudagraph_mode(attention_backends, kv_cache_groups)
|
||||
|
||||
capture_descs = self.cudagraph_dispatcher.get_capture_descs()
|
||||
capture_sizes = sorted({
|
||||
desc.num_tokens
|
||||
for _, descs in capture_descs
|
||||
for desc in descs
|
||||
})
|
||||
|
||||
# NOTE: Since aclgraph_batch_sizes cannot be determined until here,
|
||||
# we set the graph params right before initializing the keys.
|
||||
if self.use_aclgraph:
|
||||
set_graph_params(self.cudagraph_batch_sizes)
|
||||
set_graph_params(capture_sizes)
|
||||
if self.speculative_config:
|
||||
set_draft_graph_params(self.cudagraph_batch_sizes)
|
||||
set_draft_graph_params(capture_sizes)
|
||||
|
||||
def capture_model(self) -> None:
|
||||
gpu_model_runner_cls = next((cls for cls in self.__class__.__mro__ if cls.__name__ == "GPUModelRunner"), None)
|
||||
|
||||
Reference in New Issue
Block a user