[Eagle] Refactor eagle speculative decoding (#3986)

Co-authored-by: Ke Bao <ISPObaoke@163.com>
This commit is contained in:
Ying Sheng
2025-03-05 08:06:07 -08:00
committed by GitHub
parent 5be8f1ed98
commit d3d4d76758
22 changed files with 670 additions and 352 deletions

View File

@@ -22,7 +22,7 @@ from typing import List, Optional
import torch
from sglang.srt.mem_cache.memory_pool import MHATokenToKVPool, MLATokenToKVPoolHost
from sglang.srt.mem_cache.memory_pool import MHATokenToKVPool, MHATokenToKVPoolHost
logger = logging.getLogger(__name__)
@@ -128,7 +128,7 @@ class HiCacheController:
def __init__(
self,
mem_pool_device: MHATokenToKVPool,
mem_pool_host: MLATokenToKVPoolHost,
mem_pool_host: MHATokenToKVPoolHost,
write_policy: str = "write_through_selective",
):