Misc fixes for eagle (flush_cache, CPU overhead) (#3014)

This commit is contained in:
Lianmin Zheng
2025-01-20 20:25:13 -08:00
parent d2571dd5c7
commit 287d07a669
11 changed files with 133 additions and 96 deletions

View File

@@ -535,7 +535,8 @@ def test_hellaswag_select():
# Compute accuracy
accuracy_gen = np.mean(np.array(preds_gen) == np.array(labels))
assert np.abs(accuracy_gen - accuracy) < 0.1
print(f"{accuracy=}, {accuracy_gen=}")
assert np.abs(accuracy_gen - accuracy) < 0.05
assert np.abs(latency_gen - latency) < 1
return accuracy, latency

View File

@@ -567,15 +567,16 @@ def run_bench_serving(
random_range_ratio=0.0,
request_rate=request_rate,
multi=None,
seed=0,
output_file=None,
disable_tqdm=False,
disable_stream=disable_stream,
disable_ignore_eos=False,
return_logprob=False,
lora_name=None,
seed=0,
disable_ignore_eos=False,
extra_request_body=None,
apply_chat_template=False,
profile=None,
lora_name=None,
)
try: