add disable cuda graph unit test for eagle 2 (#3412)
This commit is contained in:
@@ -30,16 +30,34 @@ class TestEAGLEEngine(unittest.TestCase):
|
||||
ref_output = ref_engine.generate(prompt, sampling_params)["text"]
|
||||
ref_engine.shutdown()
|
||||
|
||||
# Test cases with different configurations
|
||||
configs = [
|
||||
# Original config
|
||||
{
|
||||
"model_path": DEFAULT_EAGLE_TARGET_MODEL_FOR_TEST,
|
||||
"speculative_draft_model_path": DEFAULT_EAGLE_DRAFT_MODEL_FOR_TEST,
|
||||
"speculative_algorithm": "EAGLE",
|
||||
"speculative_num_steps": 5,
|
||||
"speculative_eagle_topk": 8,
|
||||
"speculative_num_draft_tokens": 64,
|
||||
"mem_fraction_static": 0.7,
|
||||
},
|
||||
# Config with CUDA graph disabled
|
||||
{
|
||||
"model_path": DEFAULT_EAGLE_TARGET_MODEL_FOR_TEST,
|
||||
"speculative_draft_model_path": DEFAULT_EAGLE_DRAFT_MODEL_FOR_TEST,
|
||||
"speculative_algorithm": "EAGLE",
|
||||
"speculative_num_steps": 5,
|
||||
"speculative_eagle_topk": 8,
|
||||
"speculative_num_draft_tokens": 64,
|
||||
"mem_fraction_static": 0.7,
|
||||
"disable_cuda_graph": True,
|
||||
},
|
||||
]
|
||||
|
||||
for config in configs:
|
||||
# Launch EAGLE engine
|
||||
engine = sgl.Engine(
|
||||
model_path=DEFAULT_EAGLE_TARGET_MODEL_FOR_TEST,
|
||||
speculative_draft_model_path=DEFAULT_EAGLE_DRAFT_MODEL_FOR_TEST,
|
||||
speculative_algorithm="EAGLE",
|
||||
speculative_num_steps=5,
|
||||
speculative_eagle_topk=8,
|
||||
speculative_num_draft_tokens=64,
|
||||
mem_fraction_static=0.7,
|
||||
)
|
||||
engine = sgl.Engine(**config)
|
||||
|
||||
# Case 1: Test the output of EAGLE engine is the same as normal engine
|
||||
out1 = engine.generate(prompt, sampling_params)["text"]
|
||||
|
||||
Reference in New Issue
Block a user