sglang/test/srt/test_radix_cache_unit.py

"""
Unit tests for the RadixCache implementation.

This module tests the core functionality of RadixCache, RadixKey, and TreeNode
following SGLang testing patterns.

Test Coverage:
- RadixKey: token ID management, slicing, iteration, representation
- TreeNode: node properties, reference counting, hash values
- RadixCache: insert/match operations, eviction, page alignment, error handling
- Cache events and request handling
- Boundary conditions with parameterized testing

Usage:
    python test_radix_cache_unit.py
    python -m pytest test_radix_cache_unit.py -v
    python -m pytest test_radix_cache_unit.py::TestRadixCache::test_insert_basic
"""

import time
import unittest
import unittest.mock

import torch

from sglang.srt.disaggregation.kv_events import BlockRemoved, BlockStored
from sglang.srt.mem_cache.radix_cache import RadixCache, RadixKey, TreeNode

# Test constants
DEFAULT_PAGE_SIZE = 4


class TestRadixKey(unittest.TestCase):
    """Test cases for RadixKey class."""

    def test_init_basic(self):
        """Test basic initialization of RadixKey."""
        token_ids = [1, 2, 3, 4]
        key = RadixKey(token_ids)
        self.assertEqual(key.token_ids, token_ids)
        self.assertIsNone(key.extra_key)

    def test_init_with_extra_key(self):
        """Test initialization with extra_key."""
        token_ids = [1, 2, 3]
        extra_key = "test_key"
        key = RadixKey(token_ids, extra_key)
        self.assertEqual(key.token_ids, token_ids)
        self.assertEqual(key.extra_key, extra_key)

    def test_len(self):
        """Test __len__ method."""
        key = RadixKey([1, 2, 3])
        self.assertEqual(len(key), 3)

        empty_key = RadixKey([])
        self.assertEqual(len(empty_key), 0)

    def test_iter(self):
        """Test __iter__ method."""
        token_ids = [1, 2, 3, 4]
        key = RadixKey(token_ids)
        self.assertEqual(list(key), token_ids)

    def test_len_and_iter(self):
        """Test __len__ and __iter__ methods."""
        test_cases = [
            ([1, 2, 3], 3),
            ([], 0),
            ([42], 1),
        ]

        for tokens, expected in test_cases:
            with self.subTest(tokens=tokens):
                key = RadixKey(tokens)
                self.assertEqual(len(key), expected)
                self.assertEqual(list(key), tokens)

    def test_getitem_int(self):
        """Test __getitem__ with int index."""
        test_cases = [
            ([10, 20, 30], 0, [10]),
            ([10, 20, 30], -1, [30]),
            ([10, 20, 30], 2, [30]),
        ]

        for tokens, index, expected in test_cases:
            with self.subTest(tokens=tokens, index=index):
                key = RadixKey(tokens)
                result = key[index]
                self.assertIsInstance(result, RadixKey)
                self.assertEqual(result.token_ids, expected)

    def test_getitem_slice(self):
        """Test __getitem__ with slice and edge cases."""
        key = RadixKey([1, 2, 3, 4, 5], "extra")

        # Basic slice
        sliced = key[1:4]
        self.assertIsInstance(sliced, RadixKey)
        self.assertEqual(sliced.token_ids, [2, 3, 4])
        self.assertEqual(sliced.extra_key, "extra")

        # Edge cases
        self.assertEqual(key[2:2].token_ids, [])  # Empty slice
        self.assertEqual(key[:].token_ids, [1, 2, 3, 4, 5])  # Full slice

    def test_getitem_invalid_index(self):
        """Test __getitem__ with invalid indices."""
        key = RadixKey([1, 2, 3])
        with self.assertRaises(IndexError):
            _ = key[10]  # Out of bounds

    def test_repr(self):
        """Test __repr__ method."""
        key = RadixKey([1, 2, 3], "test")
        repr_str = repr(key)
        self.assertIn("RadixKey", repr_str)
        self.assertIn("extra_key='test'", repr_str)
        self.assertIn("[1, 2, 3]", repr_str)

    def test_repr_long_token_ids(self):
        """Test __repr__ with long token_ids."""
        long_tokens = list(range(15))
        key = RadixKey(long_tokens)
        repr_str = repr(key)
        self.assertIn("...", repr_str)  # Should be truncated


class TestTreeNode(unittest.TestCase):
    """Test cases for TreeNode class."""

    def setUp(self):
        """Reset the counter before each test."""
        TreeNode.counter = 0

    def test_init_basic(self):
        """Test basic initialization of TreeNode."""
        node = TreeNode()
        self.assertEqual(node.id, 0)
        self.assertEqual(len(node.children), 0)
        self.assertIsNone(node.parent)
        self.assertIsNone(node.key)
        self.assertIsNone(node.value)
        self.assertEqual(node.lock_ref, 0)
        self.assertEqual(node.hit_count, 0)
        self.assertEqual(node.host_ref_counter, 0)
        self.assertIsNone(node.host_value)
        self.assertIsNone(node.hash_value)

    def test_init_with_id(self):
        """Test initialization with custom ID."""
        node = TreeNode(id=42)
        self.assertEqual(node.id, 42)
        node2 = TreeNode()
        self.assertEqual(node2.id, 1)  # Counter was incremented

    def test_counter_increment(self):
        """Test that counter increments properly."""
        node1 = TreeNode()
        node2 = TreeNode()
        self.assertEqual(node1.id, 0)
        self.assertEqual(node2.id, 1)

    def test_evicted_backuped_properties(self):
        """Test evicted and backuped properties."""
        test_cases = [
            (False, False, True, False),
            (True, False, False, False),
            (True, True, False, True),
            (False, True, True, True),
        ]

        for (
            has_value,
            has_host_value,
            expected_evicted,
            expected_backuped,
        ) in test_cases:
            with self.subTest(has_value=has_value, has_host_value=has_host_value):
                node = TreeNode()

                if has_value:
                    node.value = torch.tensor([1, 2, 3])
                if has_host_value:
                    node.host_value = torch.tensor([4, 5, 6])

                self.assertEqual(node.evicted, expected_evicted)
                self.assertEqual(node.backuped, expected_backuped)

    def test_protect_release_host(self):
        """Test protect_host and release_host methods."""
        node = TreeNode()
        self.assertEqual(node.host_ref_counter, 0)

        node.protect_host()
        self.assertEqual(node.host_ref_counter, 1)

        node.release_host()
        self.assertEqual(node.host_ref_counter, 0)

        # Test error case
        with self.assertRaises(RuntimeError):
            node.release_host()

    def test_get_last_hash_value(self):
        """Test get_last_hash_value method."""
        node = TreeNode()
        self.assertIsNone(node.get_last_hash_value())

        node.hash_value = ["hash1", "hash2", "hash3"]
        self.assertEqual(node.get_last_hash_value(), "hash3")

    def test_lt_comparison(self):
        """Test less than comparison based on last_access_time."""
        node1 = TreeNode()
        time.sleep(0.001)  # Small delay to ensure different timestamps
        node2 = TreeNode()

        self.assertTrue(node1 < node2)
        self.assertFalse(node2 < node1)


class TestRadixCache(unittest.TestCase):
    """Test cases for RadixCache class."""

    def setUp(self):
        """Set up test fixtures."""
        TreeNode.counter = 0

    def test_init_variations(self):
        """Test cache initialization with different parameters."""
        test_cases = [
            (1, False, False),
            (4, False, True),
            (1, True, False),
        ]

        for page_size, disable, enable_events in test_cases:
            with self.subTest(
                page_size=page_size, disable=disable, enable_events=enable_events
            ):
                cache = RadixCache(
                    req_to_token_pool=None,
                    token_to_kv_pool_allocator=None,
                    page_size=page_size,
                    disable=disable,
                    enable_kv_cache_events=enable_events,
                )

                self.assertEqual(cache.page_size, page_size)
                self.assertEqual(cache.disable, disable)
                self.assertEqual(cache.enable_kv_cache_events, enable_events)
                self.assertEqual(cache.device, torch.device("cpu"))
                self.assertIsNotNone(cache.root_node)
                self.assertEqual(len(cache.root_node.key), 0)

    def test_reset(self):
        """Test reset method."""
        cache = RadixCache(
            req_to_token_pool=None, token_to_kv_pool_allocator=None, page_size=1
        )

        # Insert some data
        cache.insert(RadixKey([1, 2, 3]), torch.tensor([10, 20, 30], dtype=torch.int64))
        self.assertGreater(cache.total_size(), 0)

        # Reset
        cache.reset()
        self.assertEqual(cache.total_size(), 0)
        self.assertEqual(cache.evictable_size(), 0)
        self.assertEqual(cache.protected_size(), 0)

    def test_insert_and_match_basic(self):
        """Test basic insert and match operations."""
        for disable_cache in [False, True]:
            with self.subTest(disable_cache=disable_cache):
                cache = RadixCache(
                    req_to_token_pool=None,
                    token_to_kv_pool_allocator=None,
                    page_size=1,
                    disable=disable_cache,
                )

                key = RadixKey([1, 2, 3])
                value = torch.tensor([10, 20, 30], dtype=torch.int64)
                prefix_len = cache.insert(key, value)

                if disable_cache:
                    self.assertEqual(prefix_len, 0)
                    self.assertEqual(cache.total_size(), 0)
                    continue

                self.assertEqual(prefix_len, 0)  # No existing prefix
                self.assertEqual(cache.total_size(), 3)
                self.assertEqual(cache.evictable_size(), 3)

                # Test match_prefix
                result = cache.match_prefix(RadixKey([1, 2, 3]))
                self.assertEqual(len(result.device_indices), 3)
                torch.testing.assert_close(result.device_indices, value)

                # Test partial match
                result = cache.match_prefix(RadixKey([1, 2]))
                self.assertEqual(len(result.device_indices), 2)
                torch.testing.assert_close(
                    result.device_indices, torch.tensor([10, 20], dtype=torch.int64)
                )

    def test_insert_and_match_eagle(self):
        """Test insert and match operations for EAGLE."""
        cache = RadixCache(
            req_to_token_pool=None,
            token_to_kv_pool_allocator=None,
            page_size=1,
            disable=False,
            is_eagle=True,
        )

        key = RadixKey([1, 2, 3, 4])
        value = torch.tensor([10, 20, 30, 40], dtype=torch.int64)
        prefix_len = cache.insert(key, value)

        self.assertEqual(prefix_len, 0)  # No existing prefix
        self.assertEqual(
            cache.total_size(), 3
        )  # The last token is ignored in bigram key
        self.assertEqual(cache.evictable_size(), 3)

        # Test match_prefix
        result = cache.match_prefix(RadixKey([1, 2, 3, 4]))
        self.assertEqual(len(result.device_indices), 3)
        torch.testing.assert_close(
            result.device_indices, torch.tensor([10, 20, 30], dtype=torch.int64)
        )

        # Test partial match
        result = cache.match_prefix(RadixKey([1, 2]))
        self.assertEqual(len(result.device_indices), 1)
        torch.testing.assert_close(
            result.device_indices, torch.tensor([10], dtype=torch.int64)
        )

    def test_insert_and_match_eagle_page_size(self):
        """Test insert and match operations for EAGLE and page_size > 1."""
        cache = RadixCache(
            req_to_token_pool=None,
            token_to_kv_pool_allocator=None,
            page_size=2,
            disable=False,
            is_eagle=True,
        )

        key = RadixKey([1, 2, 3])
        value = torch.tensor([10, 20, 30], dtype=torch.int64)
        prefix_len = cache.insert(key, value)

        self.assertEqual(prefix_len, 0)  # No existing prefix
        self.assertEqual(cache.total_size(), 2)  # only one page is inserted
        self.assertEqual(cache.evictable_size(), 2)

        # Test match_prefix
        result = cache.match_prefix(RadixKey([1, 2, 3, 4]))
        self.assertEqual(len(result.device_indices), 2)
        torch.testing.assert_close(
            result.device_indices, torch.tensor([10, 20], dtype=torch.int64)
        )

        # Test unmatched
        result = cache.match_prefix(RadixKey([1, 2]))
        self.assertEqual(len(result.device_indices), 0)
        torch.testing.assert_close(
            result.device_indices, torch.tensor([], dtype=torch.int64)
        )

    def test_insert_with_none_value(self):
        """Test insert with None value (should use token_ids as list)."""
        cache = RadixCache(
            req_to_token_pool=None, token_to_kv_pool_allocator=None, page_size=1
        )

        key = RadixKey([1, 2, 3])
        prefix_len = cache.insert(key, None)

        # When None is passed, it should create value from token_ids
        self.assertEqual(prefix_len, 0)
        self.assertEqual(cache.total_size(), 3)

    def test_total_size(self):
        """Test total_size calculation."""
        cache = RadixCache(
            req_to_token_pool=None, token_to_kv_pool_allocator=None, page_size=1
        )

        self.assertEqual(cache.total_size(), 0)

        cache.insert(RadixKey([1, 2, 3]), torch.tensor([10, 20, 30], dtype=torch.int64))
        self.assertEqual(cache.total_size(), 3)

        cache.insert(RadixKey([4, 5]), torch.tensor([40, 50], dtype=torch.int64))
        self.assertEqual(cache.total_size(), 5)

    def test_kv_cache_events(self):
        """Test KV cache events functionality."""
        test_cases = [
            (1, True),
            (2, True),
            (1, False),
        ]

        for page_size, enable_events in test_cases:
            with self.subTest(page_size=page_size, enable_events=enable_events):
                cache = RadixCache(
                    req_to_token_pool=None,
                    token_to_kv_pool_allocator=None,
                    page_size=page_size,
                    enable_kv_cache_events=enable_events,
                )

                # Insert data
                cache.insert(RadixKey([1, 2, 3, 4, 5]), None)

                # Take events
                events = cache.take_events()

                if enable_events:
                    self.assertGreater(len(events), 0)
                    # Verify events include BlockStored events (there might be other event types)
                    block_stored_events = [
                        e for e in events if isinstance(e, BlockStored)
                    ]
                    self.assertGreater(len(block_stored_events), 0)
                    for event in block_stored_events:
                        self.assertLessEqual(len(event.token_ids), page_size)
                else:
                    self.assertEqual(len(events), 0)

    def test_kv_cache_events_with_eviction(self):
        """Test KV cache events include removal events."""
        mock_allocator = unittest.mock.Mock()
        mock_allocator.device = torch.device("cpu")

        cache = RadixCache(
            req_to_token_pool=None,
            token_to_kv_pool_allocator=mock_allocator,
            page_size=1,
            enable_kv_cache_events=True,
        )

        # Insert and then evict data
        cache.insert(RadixKey([1, 2, 3]), torch.tensor([10, 20, 30], dtype=torch.int64))
        cache.evict(3)

        # Take events - should include both store and remove events
        events = cache.take_events()
        self.assertGreater(len(events), 0)

        # Check event types
        event_types = [type(event).__name__ for event in events]
        self.assertIn("BlockStored", event_types)

        # Verify BlockRemoved event content
        remove_events = [e for e in events if isinstance(e, BlockRemoved)]
        for event in remove_events:
            self.assertGreater(len(event.block_hashes), 0)

    def test_extra_key_isolation(self):
        """Test that keys with different extra_key values are isolated."""
        cache = RadixCache(
            req_to_token_pool=None, token_to_kv_pool_allocator=None, page_size=1
        )

        # Insert same token sequence with different extra keys
        cache.insert(
            RadixKey([1, 2, 3], "key1"), torch.tensor([10, 20, 30], dtype=torch.int64)
        )
        cache.insert(
            RadixKey([1, 2, 3], "key2"), torch.tensor([40, 50, 60], dtype=torch.int64)
        )
        cache.insert(
            RadixKey([1, 2, 3], None), torch.tensor([70, 80, 90], dtype=torch.int64)
        )

        # Keys with different extra_key should not match each other
        result1 = cache.match_prefix(RadixKey([1, 2, 3], "key1"))
        result2 = cache.match_prefix(RadixKey([1, 2, 3], "key2"))
        result3 = cache.match_prefix(RadixKey([1, 2, 3], None))
        result4 = cache.match_prefix(RadixKey([1, 2, 3], "nonexistent"))

        # Each should match only its own data
        self.assertEqual(len(result1.device_indices), 3)
        torch.testing.assert_close(
            result1.device_indices, torch.tensor([10, 20, 30], dtype=torch.int64)
        )

        self.assertEqual(len(result2.device_indices), 3)
        torch.testing.assert_close(
            result2.device_indices, torch.tensor([40, 50, 60], dtype=torch.int64)
        )

        self.assertEqual(len(result3.device_indices), 3)
        torch.testing.assert_close(
            result3.device_indices, torch.tensor([70, 80, 90], dtype=torch.int64)
        )

        # Non-existent extra_key should not match
        self.assertEqual(len(result4.device_indices), 0)

    def test_lock_ref_operations(self):
        """Test lock reference counting operations."""
        cache = RadixCache(
            req_to_token_pool=None, token_to_kv_pool_allocator=None, page_size=1
        )

        # Insert sequence
        cache.insert(RadixKey([1, 2, 3]), torch.tensor([10, 20, 30], dtype=torch.int64))

        # Get node
        result = cache.match_prefix(RadixKey([1, 2, 3]))
        node = result.last_device_node

        initial_evictable = cache.evictable_size()
        initial_protected = cache.protected_size()

        # Lock the node
        cache.inc_lock_ref(node)
        self.assertEqual(cache.protected_size(), initial_protected + 3)
        self.assertEqual(cache.evictable_size(), initial_evictable - 3)

        # Unlock the node
        cache.dec_lock_ref(node)
        self.assertEqual(cache.protected_size(), initial_protected)
        self.assertEqual(cache.evictable_size(), initial_evictable)

    def test_evict_functionality(self):
        """Test eviction functionality."""
        mock_allocator = unittest.mock.Mock()
        mock_allocator.device = torch.device("cpu")

        cache = RadixCache(
            req_to_token_pool=None,
            token_to_kv_pool_allocator=mock_allocator,
            page_size=1,
        )

        # Insert sequences
        cache.insert(RadixKey([1, 2]), torch.tensor([10, 20], dtype=torch.int64))
        cache.insert(RadixKey([3, 4]), torch.tensor([30, 40], dtype=torch.int64))

        initial_size = cache.total_size()

        # Evict some tokens
        cache.evict(2)

        # Should have called free and reduced size
        mock_allocator.free.assert_called()
        self.assertLess(cache.total_size(), initial_size)

    def test_page_alignment_boundary(self):
        """Test page alignment with different sizes."""
        test_cases = [
            (1, 5),
            (2, 5),
            (4, 6),
        ]

        for page_size, sequence_length in test_cases:
            with self.subTest(page_size=page_size, sequence_length=sequence_length):
                cache = RadixCache(
                    req_to_token_pool=None,
                    token_to_kv_pool_allocator=None,
                    page_size=page_size,
                )

                tokens = list(range(sequence_length))
                cache.insert(RadixKey(tokens), torch.tensor(tokens, dtype=torch.int64))

                result = cache.match_prefix(RadixKey(tokens))
                self.assertGreater(len(result.device_indices), 0)

                # Match length should be page-aligned
                match_len = len(result.device_indices)
                self.assertEqual(match_len % page_size, 0)

    def test_pretty_print_basic(self):
        """Test pretty_print produces output."""
        cache = RadixCache(
            req_to_token_pool=None, token_to_kv_pool_allocator=None, page_size=1
        )

        cache.insert(RadixKey([1, 2, 3]), torch.tensor([10, 20, 30], dtype=torch.int64))

        # Just test that it doesn't crash
        try:
            cache.pretty_print()
        except Exception as e:
            self.fail(f"pretty_print raised an exception: {e}")

    def test_all_values_flatten(self):
        """Test all_values_flatten method."""
        cache = RadixCache(
            req_to_token_pool=None, token_to_kv_pool_allocator=None, page_size=1
        )

        cache.insert(RadixKey([1, 2]), torch.tensor([10, 20], dtype=torch.int64))
        cache.insert(RadixKey([3, 4]), torch.tensor([30, 40], dtype=torch.int64))

        all_values = cache.all_values_flatten()
        self.assertEqual(len(all_values), 4)
        # Values should contain all inserted values (order may vary)
        values_set = set(all_values.tolist())
        self.assertEqual(values_set, {10, 20, 30, 40})

    def test_advanced_prefix_match_with_node_splits(self):
        """Advanced prefix matching: splits inside nodes and across pages."""
        for page_size in [1, 2]:
            with self.subTest(page_size=page_size):
                cache = RadixCache(
                    req_to_token_pool=None,
                    token_to_kv_pool_allocator=None,
                    page_size=page_size,
                )

                # Insert a long sequence that will be split later.
                seq1 = [1, 2, 3, 4, 5, 6, 7, 8]
                val1 = torch.tensor([x * 10 for x in seq1], dtype=torch.int64)
                cache.insert(RadixKey(seq1), val1)

                # Insert a diverging branch to create an internal node on the path.
                seq2 = [1, 2, 9, 10]
                val2 = torch.tensor([x * 10 for x in seq2], dtype=torch.int64)
                cache.insert(RadixKey(seq2), val2)
                print(cache.pretty_print())

                baseline_total = cache.total_size()
                expected_total = 10  # 8 + 2
                self.assertEqual(baseline_total, expected_total)

                # Match that causes a split inside an existing node:
                # take first 4 tokens of seq1, then diverge.
                query1 = [1, 2, 3, 4, 999, 1000]
                result1 = cache.match_prefix(RadixKey(query1))
                torch.testing.assert_close(result1.device_indices, val1[:4])
                # No data change after structural split during matching.
                self.assertEqual(cache.total_size(), baseline_total)

                # Full match of the long sequence still returns the full indices.
                result_full = cache.match_prefix(RadixKey(seq1))
                torch.testing.assert_close(result_full.device_indices, val1)

                # Another split deeper on the path (after matching 6 tokens, then diverge).
                query2 = [1, 2, 3, 4, 5, 6, 777, 888]
                result2 = cache.match_prefix(RadixKey(query2))
                torch.testing.assert_close(result2.device_indices, val1[:6])
                self.assertEqual(cache.total_size(), baseline_total)

                # Matching the short diverging branch should return exactly its indices.
                result_branch = cache.match_prefix(RadixKey(seq2))
                torch.testing.assert_close(result_branch.device_indices, val2)


if __name__ == "__main__":
    unittest.main()
Refactors radix cache for extra key support (#10317) Signed-off-by: Xinyuan Tong <xinyuantong.cs@gmail.com> 2025-09-21 11:16:16 -07:00			`"""`
			`Unit tests for the RadixCache implementation.`

			`This module tests the core functionality of RadixCache, RadixKey, and TreeNode`
			`following SGLang testing patterns.`

			`Test Coverage:`
			`- RadixKey: token ID management, slicing, iteration, representation`
			`- TreeNode: node properties, reference counting, hash values`
			`- RadixCache: insert/match operations, eviction, page alignment, error handling`
			`- Cache events and request handling`
			`- Boundary conditions with parameterized testing`

			`Usage:`
			`python test_radix_cache_unit.py`
			`python -m pytest test_radix_cache_unit.py -v`
			`python -m pytest test_radix_cache_unit.py::TestRadixCache::test_insert_basic`
			`"""`

			`import time`
			`import unittest`
			`import unittest.mock`

			`import torch`

			`from sglang.srt.disaggregation.kv_events import BlockRemoved, BlockStored`
			`from sglang.srt.mem_cache.radix_cache import RadixCache, RadixKey, TreeNode`

			`# Test constants`
			`DEFAULT_PAGE_SIZE = 4`


			`class TestRadixKey(unittest.TestCase):`
			`"""Test cases for RadixKey class."""`

			`def test_init_basic(self):`
			`"""Test basic initialization of RadixKey."""`
			`token_ids = [1, 2, 3, 4]`
			`key = RadixKey(token_ids)`
			`self.assertEqual(key.token_ids, token_ids)`
			`self.assertIsNone(key.extra_key)`

			`def test_init_with_extra_key(self):`
			`"""Test initialization with extra_key."""`
			`token_ids = [1, 2, 3]`
			`extra_key = "test_key"`
			`key = RadixKey(token_ids, extra_key)`
			`self.assertEqual(key.token_ids, token_ids)`
			`self.assertEqual(key.extra_key, extra_key)`

			`def test_len(self):`
			`"""Test __len__ method."""`
			`key = RadixKey([1, 2, 3])`
			`self.assertEqual(len(key), 3)`

			`empty_key = RadixKey([])`
			`self.assertEqual(len(empty_key), 0)`

			`def test_iter(self):`
			`"""Test __iter__ method."""`
			`token_ids = [1, 2, 3, 4]`
			`key = RadixKey(token_ids)`
			`self.assertEqual(list(key), token_ids)`

			`def test_len_and_iter(self):`
			`"""Test __len__ and __iter__ methods."""`
			`test_cases = [`
			`([1, 2, 3], 3),`
			`([], 0),`
			`([42], 1),`
			`]`

			`for tokens, expected in test_cases:`
			`with self.subTest(tokens=tokens):`
			`key = RadixKey(tokens)`
			`self.assertEqual(len(key), expected)`
			`self.assertEqual(list(key), tokens)`

			`def test_getitem_int(self):`
			`"""Test __getitem__ with int index."""`
			`test_cases = [`
			`([10, 20, 30], 0, [10]),`
			`([10, 20, 30], -1, [30]),`
			`([10, 20, 30], 2, [30]),`
			`]`

			`for tokens, index, expected in test_cases:`
			`with self.subTest(tokens=tokens, index=index):`
			`key = RadixKey(tokens)`
			`result = key[index]`
			`self.assertIsInstance(result, RadixKey)`
			`self.assertEqual(result.token_ids, expected)`

			`def test_getitem_slice(self):`
			`"""Test __getitem__ with slice and edge cases."""`
			`key = RadixKey([1, 2, 3, 4, 5], "extra")`

			`# Basic slice`
			`sliced = key[1:4]`
			`self.assertIsInstance(sliced, RadixKey)`
			`self.assertEqual(sliced.token_ids, [2, 3, 4])`
			`self.assertEqual(sliced.extra_key, "extra")`

			`# Edge cases`
			`self.assertEqual(key[2:2].token_ids, []) # Empty slice`
			`self.assertEqual(key[:].token_ids, [1, 2, 3, 4, 5]) # Full slice`

			`def test_getitem_invalid_index(self):`
			`"""Test __getitem__ with invalid indices."""`
			`key = RadixKey([1, 2, 3])`
			`with self.assertRaises(IndexError):`
			`_ = key[10] # Out of bounds`

			`def test_repr(self):`
			`"""Test __repr__ method."""`
			`key = RadixKey([1, 2, 3], "test")`
			`repr_str = repr(key)`
			`self.assertIn("RadixKey", repr_str)`
			`self.assertIn("extra_key='test'", repr_str)`
			`self.assertIn("[1, 2, 3]", repr_str)`

			`def test_repr_long_token_ids(self):`
			`"""Test __repr__ with long token_ids."""`
			`long_tokens = list(range(15))`
			`key = RadixKey(long_tokens)`
			`repr_str = repr(key)`
			`self.assertIn("...", repr_str) # Should be truncated`


			`class TestTreeNode(unittest.TestCase):`
			`"""Test cases for TreeNode class."""`

			`def setUp(self):`
			`"""Reset the counter before each test."""`
			`TreeNode.counter = 0`

			`def test_init_basic(self):`
			`"""Test basic initialization of TreeNode."""`
			`node = TreeNode()`
			`self.assertEqual(node.id, 0)`
			`self.assertEqual(len(node.children), 0)`
			`self.assertIsNone(node.parent)`
			`self.assertIsNone(node.key)`
			`self.assertIsNone(node.value)`
			`self.assertEqual(node.lock_ref, 0)`
			`self.assertEqual(node.hit_count, 0)`
			`self.assertEqual(node.host_ref_counter, 0)`
			`self.assertIsNone(node.host_value)`
			`self.assertIsNone(node.hash_value)`

			`def test_init_with_id(self):`
			`"""Test initialization with custom ID."""`
			`node = TreeNode(id=42)`
			`self.assertEqual(node.id, 42)`
			`node2 = TreeNode()`
			`self.assertEqual(node2.id, 1) # Counter was incremented`

			`def test_counter_increment(self):`
			`"""Test that counter increments properly."""`
			`node1 = TreeNode()`
			`node2 = TreeNode()`
			`self.assertEqual(node1.id, 0)`
			`self.assertEqual(node2.id, 1)`

			`def test_evicted_backuped_properties(self):`
			`"""Test evicted and backuped properties."""`
			`test_cases = [`
			`(False, False, True, False),`
			`(True, False, False, False),`
			`(True, True, False, True),`
			`(False, True, True, True),`
			`]`

			`for (`
			`has_value,`
			`has_host_value,`
			`expected_evicted,`
			`expected_backuped,`
			`) in test_cases:`
			`with self.subTest(has_value=has_value, has_host_value=has_host_value):`
			`node = TreeNode()`

			`if has_value:`
			`node.value = torch.tensor([1, 2, 3])`
			`if has_host_value:`
			`node.host_value = torch.tensor([4, 5, 6])`

			`self.assertEqual(node.evicted, expected_evicted)`
			`self.assertEqual(node.backuped, expected_backuped)`

			`def test_protect_release_host(self):`
			`"""Test protect_host and release_host methods."""`
			`node = TreeNode()`
			`self.assertEqual(node.host_ref_counter, 0)`

			`node.protect_host()`
			`self.assertEqual(node.host_ref_counter, 1)`

			`node.release_host()`
			`self.assertEqual(node.host_ref_counter, 0)`

			`# Test error case`
			`with self.assertRaises(RuntimeError):`
			`node.release_host()`

			`def test_get_last_hash_value(self):`
			`"""Test get_last_hash_value method."""`
			`node = TreeNode()`
			`self.assertIsNone(node.get_last_hash_value())`

			`node.hash_value = ["hash1", "hash2", "hash3"]`
			`self.assertEqual(node.get_last_hash_value(), "hash3")`

			`def test_lt_comparison(self):`
			`"""Test less than comparison based on last_access_time."""`
			`node1 = TreeNode()`
			`time.sleep(0.001) # Small delay to ensure different timestamps`
			`node2 = TreeNode()`

			`self.assertTrue(node1 < node2)`
			`self.assertFalse(node2 < node1)`


			`class TestRadixCache(unittest.TestCase):`
			`"""Test cases for RadixCache class."""`

			`def setUp(self):`
			`"""Set up test fixtures."""`
			`TreeNode.counter = 0`

			`def test_init_variations(self):`
			`"""Test cache initialization with different parameters."""`
			`test_cases = [`
			`(1, False, False),`
			`(4, False, True),`
			`(1, True, False),`
			`]`

			`for page_size, disable, enable_events in test_cases:`
			`with self.subTest(`
			`page_size=page_size, disable=disable, enable_events=enable_events`
			`):`
			`cache = RadixCache(`
			`req_to_token_pool=None,`
			`token_to_kv_pool_allocator=None,`
			`page_size=page_size,`
			`disable=disable,`
			`enable_kv_cache_events=enable_events,`
			`)`

			`self.assertEqual(cache.page_size, page_size)`
			`self.assertEqual(cache.disable, disable)`
			`self.assertEqual(cache.enable_kv_cache_events, enable_events)`
			`self.assertEqual(cache.device, torch.device("cpu"))`
			`self.assertIsNotNone(cache.root_node)`
			`self.assertEqual(len(cache.root_node.key), 0)`

			`def test_reset(self):`
			`"""Test reset method."""`
			`cache = RadixCache(`
			`req_to_token_pool=None, token_to_kv_pool_allocator=None, page_size=1`
			`)`

			`# Insert some data`
			`cache.insert(RadixKey([1, 2, 3]), torch.tensor([10, 20, 30], dtype=torch.int64))`
			`self.assertGreater(cache.total_size(), 0)`

			`# Reset`
			`cache.reset()`
			`self.assertEqual(cache.total_size(), 0)`
			`self.assertEqual(cache.evictable_size(), 0)`
			`self.assertEqual(cache.protected_size(), 0)`

			`def test_insert_and_match_basic(self):`
			`"""Test basic insert and match operations."""`
			`for disable_cache in [False, True]:`
			`with self.subTest(disable_cache=disable_cache):`
			`cache = RadixCache(`
			`req_to_token_pool=None,`
			`token_to_kv_pool_allocator=None,`
			`page_size=1,`
			`disable=disable_cache,`
			`)`

			`key = RadixKey([1, 2, 3])`
			`value = torch.tensor([10, 20, 30], dtype=torch.int64)`
			`prefix_len = cache.insert(key, value)`

			`if disable_cache:`
			`self.assertEqual(prefix_len, 0)`
			`self.assertEqual(cache.total_size(), 0)`
			`continue`

			`self.assertEqual(prefix_len, 0) # No existing prefix`
			`self.assertEqual(cache.total_size(), 3)`
			`self.assertEqual(cache.evictable_size(), 3)`

			`# Test match_prefix`
			`result = cache.match_prefix(RadixKey([1, 2, 3]))`
			`self.assertEqual(len(result.device_indices), 3)`
			`torch.testing.assert_close(result.device_indices, value)`

			`# Test partial match`
			`result = cache.match_prefix(RadixKey([1, 2]))`
			`self.assertEqual(len(result.device_indices), 2)`
			`torch.testing.assert_close(`
			`result.device_indices, torch.tensor([10, 20], dtype=torch.int64)`
			`)`

Fix eagle radix cache (#10846) 2025-09-30 22:59:20 +08:00			`def test_insert_and_match_eagle(self):`
			`"""Test insert and match operations for EAGLE."""`
			`cache = RadixCache(`
			`req_to_token_pool=None,`
			`token_to_kv_pool_allocator=None,`
			`page_size=1,`
			`disable=False,`
			`is_eagle=True,`
			`)`

			`key = RadixKey([1, 2, 3, 4])`
			`value = torch.tensor([10, 20, 30, 40], dtype=torch.int64)`
			`prefix_len = cache.insert(key, value)`

			`self.assertEqual(prefix_len, 0) # No existing prefix`
			`self.assertEqual(`
			`cache.total_size(), 3`
			`) # The last token is ignored in bigram key`
			`self.assertEqual(cache.evictable_size(), 3)`

			`# Test match_prefix`
			`result = cache.match_prefix(RadixKey([1, 2, 3, 4]))`
			`self.assertEqual(len(result.device_indices), 3)`
			`torch.testing.assert_close(`
			`result.device_indices, torch.tensor([10, 20, 30], dtype=torch.int64)`
			`)`

			`# Test partial match`
			`result = cache.match_prefix(RadixKey([1, 2]))`
			`self.assertEqual(len(result.device_indices), 1)`
			`torch.testing.assert_close(`
			`result.device_indices, torch.tensor([10], dtype=torch.int64)`
			`)`

			`def test_insert_and_match_eagle_page_size(self):`
			`"""Test insert and match operations for EAGLE and page_size > 1."""`
			`cache = RadixCache(`
			`req_to_token_pool=None,`
			`token_to_kv_pool_allocator=None,`
			`page_size=2,`
			`disable=False,`
			`is_eagle=True,`
			`)`

			`key = RadixKey([1, 2, 3])`
			`value = torch.tensor([10, 20, 30], dtype=torch.int64)`
			`prefix_len = cache.insert(key, value)`

			`self.assertEqual(prefix_len, 0) # No existing prefix`
			`self.assertEqual(cache.total_size(), 2) # only one page is inserted`
			`self.assertEqual(cache.evictable_size(), 2)`

			`# Test match_prefix`
			`result = cache.match_prefix(RadixKey([1, 2, 3, 4]))`
			`self.assertEqual(len(result.device_indices), 2)`
			`torch.testing.assert_close(`
			`result.device_indices, torch.tensor([10, 20], dtype=torch.int64)`
			`)`

			`# Test unmatched`
			`result = cache.match_prefix(RadixKey([1, 2]))`
			`self.assertEqual(len(result.device_indices), 0)`
			`torch.testing.assert_close(`
			`result.device_indices, torch.tensor([], dtype=torch.int64)`
			`)`

Refactors radix cache for extra key support (#10317) Signed-off-by: Xinyuan Tong <xinyuantong.cs@gmail.com> 2025-09-21 11:16:16 -07:00			`def test_insert_with_none_value(self):`
			`"""Test insert with None value (should use token_ids as list)."""`
			`cache = RadixCache(`
			`req_to_token_pool=None, token_to_kv_pool_allocator=None, page_size=1`
			`)`

			`key = RadixKey([1, 2, 3])`
			`prefix_len = cache.insert(key, None)`

			`# When None is passed, it should create value from token_ids`
			`self.assertEqual(prefix_len, 0)`
			`self.assertEqual(cache.total_size(), 3)`

			`def test_total_size(self):`
			`"""Test total_size calculation."""`
			`cache = RadixCache(`
			`req_to_token_pool=None, token_to_kv_pool_allocator=None, page_size=1`
			`)`

			`self.assertEqual(cache.total_size(), 0)`

			`cache.insert(RadixKey([1, 2, 3]), torch.tensor([10, 20, 30], dtype=torch.int64))`
			`self.assertEqual(cache.total_size(), 3)`

			`cache.insert(RadixKey([4, 5]), torch.tensor([40, 50], dtype=torch.int64))`
			`self.assertEqual(cache.total_size(), 5)`

			`def test_kv_cache_events(self):`
			`"""Test KV cache events functionality."""`
			`test_cases = [`
			`(1, True),`
			`(2, True),`
			`(1, False),`
			`]`

			`for page_size, enable_events in test_cases:`
			`with self.subTest(page_size=page_size, enable_events=enable_events):`
			`cache = RadixCache(`
			`req_to_token_pool=None,`
			`token_to_kv_pool_allocator=None,`
			`page_size=page_size,`
			`enable_kv_cache_events=enable_events,`
			`)`

			`# Insert data`
			`cache.insert(RadixKey([1, 2, 3, 4, 5]), None)`

			`# Take events`
			`events = cache.take_events()`

			`if enable_events:`
			`self.assertGreater(len(events), 0)`
			`# Verify events include BlockStored events (there might be other event types)`
			`block_stored_events = [`
			`e for e in events if isinstance(e, BlockStored)`
			`]`
			`self.assertGreater(len(block_stored_events), 0)`
			`for event in block_stored_events:`
			`self.assertLessEqual(len(event.token_ids), page_size)`
			`else:`
			`self.assertEqual(len(events), 0)`

			`def test_kv_cache_events_with_eviction(self):`
			`"""Test KV cache events include removal events."""`
			`mock_allocator = unittest.mock.Mock()`
			`mock_allocator.device = torch.device("cpu")`

			`cache = RadixCache(`
			`req_to_token_pool=None,`
			`token_to_kv_pool_allocator=mock_allocator,`
			`page_size=1,`
			`enable_kv_cache_events=True,`
			`)`

			`# Insert and then evict data`
			`cache.insert(RadixKey([1, 2, 3]), torch.tensor([10, 20, 30], dtype=torch.int64))`
			`cache.evict(3)`

			`# Take events - should include both store and remove events`
			`events = cache.take_events()`
			`self.assertGreater(len(events), 0)`

			`# Check event types`
			`event_types = [type(event).__name__ for event in events]`
			`self.assertIn("BlockStored", event_types)`

			`# Verify BlockRemoved event content`
			`remove_events = [e for e in events if isinstance(e, BlockRemoved)]`
			`for event in remove_events:`
			`self.assertGreater(len(event.block_hashes), 0)`

			`def test_extra_key_isolation(self):`
			`"""Test that keys with different extra_key values are isolated."""`
			`cache = RadixCache(`
			`req_to_token_pool=None, token_to_kv_pool_allocator=None, page_size=1`
			`)`

			`# Insert same token sequence with different extra keys`
			`cache.insert(`
			`RadixKey([1, 2, 3], "key1"), torch.tensor([10, 20, 30], dtype=torch.int64)`
			`)`
			`cache.insert(`
			`RadixKey([1, 2, 3], "key2"), torch.tensor([40, 50, 60], dtype=torch.int64)`
			`)`
			`cache.insert(`
			`RadixKey([1, 2, 3], None), torch.tensor([70, 80, 90], dtype=torch.int64)`
			`)`

			`# Keys with different extra_key should not match each other`
			`result1 = cache.match_prefix(RadixKey([1, 2, 3], "key1"))`
			`result2 = cache.match_prefix(RadixKey([1, 2, 3], "key2"))`
			`result3 = cache.match_prefix(RadixKey([1, 2, 3], None))`
			`result4 = cache.match_prefix(RadixKey([1, 2, 3], "nonexistent"))`

			`# Each should match only its own data`
			`self.assertEqual(len(result1.device_indices), 3)`
			`torch.testing.assert_close(`
			`result1.device_indices, torch.tensor([10, 20, 30], dtype=torch.int64)`
			`)`

			`self.assertEqual(len(result2.device_indices), 3)`
			`torch.testing.assert_close(`
			`result2.device_indices, torch.tensor([40, 50, 60], dtype=torch.int64)`
			`)`

			`self.assertEqual(len(result3.device_indices), 3)`
			`torch.testing.assert_close(`
			`result3.device_indices, torch.tensor([70, 80, 90], dtype=torch.int64)`
			`)`

			`# Non-existent extra_key should not match`
			`self.assertEqual(len(result4.device_indices), 0)`

			`def test_lock_ref_operations(self):`
			`"""Test lock reference counting operations."""`
			`cache = RadixCache(`
			`req_to_token_pool=None, token_to_kv_pool_allocator=None, page_size=1`
			`)`

			`# Insert sequence`
			`cache.insert(RadixKey([1, 2, 3]), torch.tensor([10, 20, 30], dtype=torch.int64))`

			`# Get node`
			`result = cache.match_prefix(RadixKey([1, 2, 3]))`
			`node = result.last_device_node`

			`initial_evictable = cache.evictable_size()`
			`initial_protected = cache.protected_size()`

			`# Lock the node`
			`cache.inc_lock_ref(node)`
			`self.assertEqual(cache.protected_size(), initial_protected + 3)`
			`self.assertEqual(cache.evictable_size(), initial_evictable - 3)`

			`# Unlock the node`
			`cache.dec_lock_ref(node)`
			`self.assertEqual(cache.protected_size(), initial_protected)`
			`self.assertEqual(cache.evictable_size(), initial_evictable)`

			`def test_evict_functionality(self):`
			`"""Test eviction functionality."""`
			`mock_allocator = unittest.mock.Mock()`
			`mock_allocator.device = torch.device("cpu")`

			`cache = RadixCache(`
			`req_to_token_pool=None,`
			`token_to_kv_pool_allocator=mock_allocator,`
			`page_size=1,`
			`)`

			`# Insert sequences`
			`cache.insert(RadixKey([1, 2]), torch.tensor([10, 20], dtype=torch.int64))`
			`cache.insert(RadixKey([3, 4]), torch.tensor([30, 40], dtype=torch.int64))`

			`initial_size = cache.total_size()`

			`# Evict some tokens`
			`cache.evict(2)`

			`# Should have called free and reduced size`
			`mock_allocator.free.assert_called()`
			`self.assertLess(cache.total_size(), initial_size)`

			`def test_page_alignment_boundary(self):`
			`"""Test page alignment with different sizes."""`
			`test_cases = [`
			`(1, 5),`
			`(2, 5),`
			`(4, 6),`
			`]`

			`for page_size, sequence_length in test_cases:`
			`with self.subTest(page_size=page_size, sequence_length=sequence_length):`
			`cache = RadixCache(`
			`req_to_token_pool=None,`
			`token_to_kv_pool_allocator=None,`
			`page_size=page_size,`
			`)`

			`tokens = list(range(sequence_length))`
			`cache.insert(RadixKey(tokens), torch.tensor(tokens, dtype=torch.int64))`

			`result = cache.match_prefix(RadixKey(tokens))`
			`self.assertGreater(len(result.device_indices), 0)`

			`# Match length should be page-aligned`
			`match_len = len(result.device_indices)`
			`self.assertEqual(match_len % page_size, 0)`

			`def test_pretty_print_basic(self):`
			`"""Test pretty_print produces output."""`
			`cache = RadixCache(`
			`req_to_token_pool=None, token_to_kv_pool_allocator=None, page_size=1`
			`)`

			`cache.insert(RadixKey([1, 2, 3]), torch.tensor([10, 20, 30], dtype=torch.int64))`

			`# Just test that it doesn't crash`
			`try:`
			`cache.pretty_print()`
			`except Exception as e:`
			`self.fail(f"pretty_print raised an exception: {e}")`

			`def test_all_values_flatten(self):`
			`"""Test all_values_flatten method."""`
			`cache = RadixCache(`
			`req_to_token_pool=None, token_to_kv_pool_allocator=None, page_size=1`
			`)`

			`cache.insert(RadixKey([1, 2]), torch.tensor([10, 20], dtype=torch.int64))`
			`cache.insert(RadixKey([3, 4]), torch.tensor([30, 40], dtype=torch.int64))`

			`all_values = cache.all_values_flatten()`
			`self.assertEqual(len(all_values), 4)`
			`# Values should contain all inserted values (order may vary)`
			`values_set = set(all_values.tolist())`
			`self.assertEqual(values_set, {10, 20, 30, 40})`

			`def test_advanced_prefix_match_with_node_splits(self):`
			`"""Advanced prefix matching: splits inside nodes and across pages."""`
			`for page_size in [1, 2]:`
			`with self.subTest(page_size=page_size):`
			`cache = RadixCache(`
			`req_to_token_pool=None,`
			`token_to_kv_pool_allocator=None,`
			`page_size=page_size,`
			`)`

			`# Insert a long sequence that will be split later.`
			`seq1 = [1, 2, 3, 4, 5, 6, 7, 8]`
			`val1 = torch.tensor([x * 10 for x in seq1], dtype=torch.int64)`
			`cache.insert(RadixKey(seq1), val1)`

			`# Insert a diverging branch to create an internal node on the path.`
			`seq2 = [1, 2, 9, 10]`
			`val2 = torch.tensor([x * 10 for x in seq2], dtype=torch.int64)`
			`cache.insert(RadixKey(seq2), val2)`
			`print(cache.pretty_print())`

			`baseline_total = cache.total_size()`
			`expected_total = 10 # 8 + 2`
			`self.assertEqual(baseline_total, expected_total)`

			`# Match that causes a split inside an existing node:`
			`# take first 4 tokens of seq1, then diverge.`
			`query1 = [1, 2, 3, 4, 999, 1000]`
			`result1 = cache.match_prefix(RadixKey(query1))`
			`torch.testing.assert_close(result1.device_indices, val1[:4])`
			`# No data change after structural split during matching.`
			`self.assertEqual(cache.total_size(), baseline_total)`

			`# Full match of the long sequence still returns the full indices.`
			`result_full = cache.match_prefix(RadixKey(seq1))`
			`torch.testing.assert_close(result_full.device_indices, val1)`

			`# Another split deeper on the path (after matching 6 tokens, then diverge).`
			`query2 = [1, 2, 3, 4, 5, 6, 777, 888]`
			`result2 = cache.match_prefix(RadixKey(query2))`
			`torch.testing.assert_close(result2.device_indices, val1[:6])`
			`self.assertEqual(cache.total_size(), baseline_total)`

			`# Matching the short diverging branch should return exactly its indices.`
			`result_branch = cache.match_prefix(RadixKey(seq2))`
			`torch.testing.assert_close(result_branch.device_indices, val2)`


			`if __name__ == "__main__":`
			`unittest.main()`