[Platform][Model Runner] Add hash of request_ids; Change blocksize back to 128. (#293)

This PR changes the initial value of blocksize back to 128 and adds hash value of request id list in model runner for implementing sampling param cache in sampler. Signed-off-by: hw_whx <wanghexiang7@huawei.com> Co-authored-by: hw_whx <wanghexiang7@huawei.com>
2025-03-11 18:50:28 +08:00
parent 007aeaa48b
commit feb6bdb12e
2 changed files with 6 additions and 2 deletions
--- a/vllm_ascend/worker/model_runner.py
+++ b/vllm_ascend/worker/model_runner.py
@@ -1061,6 +1061,11 @@ class NPUModelRunner(NPUModelRunnerBase[ModelInputForNPUWithSamplingMetadata]):
                # TODO (cmq): enable this after supported in vllm
                # pad_for_invariant_seq_len=True,
            )
+            # Get hash value of request id list to perform sampling param cache in sampler.
+            request_ids = model_input.request_ids_to_seq_ids.keys(  # type: ignore
+            )  # type: ignore
+            request_ids_hash = hash("".join(request_ids))
+            sampling_metadata.request_ids_hash = request_ids_hash  # type: ignore
        else:
            sampling_metadata = None
        is_prompt = (seq_group_metadata_list[0].is_prompt