Returning a per request metric for number of cached_tokens read (#1599)

2024-10-16 20:49:22 +02:00
parent dbec2f1847
commit ecb8bad276
7 changed files with 245 additions and 3 deletions
--- a/python/sglang/srt/managers/schedule_policy.py
+++ b/python/sglang/srt/managers/schedule_policy.py
@@ -51,6 +51,7 @@ class SchedulePolicy:
                r.prefix_indices, r.last_node = self.tree_cache.match_prefix(
                    rid=r.rid, key=r.adjust_max_prefix_ids()
                )
+
            prefix_computed = True

        if self.policy == "lpm":