Fix a draft model accuracy bug in eagle; support step=1; return logprob in eagle (#4134)

Co-authored-by: Sehoon Kim <kssteven418@gmail.com>
Co-authored-by: SangBin Cho <rkooo567@gmail.com>
Co-authored-by: Sehoon Kim <sehoon@x.ai>
This commit is contained in:
Lianmin Zheng
2025-03-06 06:13:59 -08:00
committed by GitHub
parent 3a3918121f
commit bc1534ff32
11 changed files with 304 additions and 106 deletions

View File

@@ -143,11 +143,11 @@ class TestGPTQModelDynamic(unittest.TestCase):
print(f"result = `{result}`")
assert "paris" in result["text"].lower()
self.assertIn("paris", result["text"].lower())
throughput = max_tokens / (tok - tic)
print(f"Throughput: {throughput} tokens/s")
assert throughput >= 140
self.assertGreaterEqual(throughput, 140)
def test_gptq_module(self):
check_quant_method(self.MODEL_PATH, use_marlin_kernel=False)