Separate allocation logic from scheduler (#11313)

This commit is contained in:
cctry
2025-10-10 17:38:54 -07:00
committed by GitHub
parent 9aa4502d11
commit b36afed4a7
7 changed files with 545 additions and 399 deletions

View File

@@ -8,6 +8,7 @@ python3 test_forward_split_prefill.py
"""
import unittest
from types import SimpleNamespace
import numpy as np
import torch
@@ -95,11 +96,18 @@ class TestForwardSplitPrefill(CustomTestCase):
req.logprob_start_len = len(req.origin_input_ids) - 1
reqs.append(req)
# Create dummy tree_cache for tests (no prefix caching, just allocation)
dummy_tree_cache = SimpleNamespace(
page_size=1,
device=self.model_runner.device,
token_to_kv_pool_allocator=self.model_runner.token_to_kv_pool_allocator,
)
batch = ScheduleBatch.init_new(
reqs=reqs,
req_to_token_pool=self.model_runner.req_to_token_pool,
token_to_kv_pool_allocator=self.model_runner.token_to_kv_pool_allocator,
tree_cache=None,
tree_cache=dummy_tree_cache,
model_config=self.model_config,
enable_overlap=False,
spec_algorithm=SpeculativeAlgorithm.NONE,