Separate allocation logic from scheduler (#11313)

2025-10-10 17:38:54 -07:00
parent 9aa4502d11
commit b36afed4a7
7 changed files with 545 additions and 399 deletions
--- a/test/srt/test_forward_split_prefill.py
+++ b/test/srt/test_forward_split_prefill.py
@@ -8,6 +8,7 @@ python3 test_forward_split_prefill.py
 """

 import unittest
+from types import SimpleNamespace

 import numpy as np
 import torch
@@ -95,11 +96,18 @@ class TestForwardSplitPrefill(CustomTestCase):
            req.logprob_start_len = len(req.origin_input_ids) - 1
            reqs.append(req)

+        # Create dummy tree_cache for tests (no prefix caching, just allocation)
+        dummy_tree_cache = SimpleNamespace(
+            page_size=1,
+            device=self.model_runner.device,
+            token_to_kv_pool_allocator=self.model_runner.token_to_kv_pool_allocator,
+        )
+
        batch = ScheduleBatch.init_new(
            reqs=reqs,
            req_to_token_pool=self.model_runner.req_to_token_pool,
            token_to_kv_pool_allocator=self.model_runner.token_to_kv_pool_allocator,
-            tree_cache=None,
+            tree_cache=dummy_tree_cache,
            model_config=self.model_config,
            enable_overlap=False,
            spec_algorithm=SpeculativeAlgorithm.NONE,