Multi-Stage Awake: Support Resume and Pause KV Cache and Weights separately (#7099)

This commit is contained in:
Stefan He
2025-06-19 00:56:37 -07:00
committed by GitHub
parent 9179ea1595
commit 3774f07825
14 changed files with 297 additions and 108 deletions

View File

@@ -37,6 +37,7 @@ from sglang.utils import get_exception_traceback
# General test models
DEFAULT_MODEL_NAME_FOR_TEST = "meta-llama/Llama-3.1-8B-Instruct"
DEFAULT_SMALL_MODEL_NAME_FOR_TEST = "meta-llama/Llama-3.2-1B-Instruct"
DEFAULT_SMALL_MODEL_NAME_FOR_TEST_BASE = "meta-llama/Llama-3.2-1B"
DEFAULT_MOE_MODEL_NAME_FOR_TEST = "mistralai/Mixtral-8x7B-Instruct-v0.1"
DEFAULT_SMALL_MOE_MODEL_NAME_FOR_TEST = "Qwen/Qwen1.5-MoE-A2.7B"