Fix a draft model accuracy bug in eagle; support step=1; return logprob in eagle (#4134)
Co-authored-by: Sehoon Kim <kssteven418@gmail.com> Co-authored-by: SangBin Cho <rkooo567@gmail.com> Co-authored-by: Sehoon Kim <sehoon@x.ai>
This commit is contained in:
@@ -143,11 +143,11 @@ class TestGPTQModelDynamic(unittest.TestCase):
|
||||
|
||||
print(f"result = `{result}`")
|
||||
|
||||
assert "paris" in result["text"].lower()
|
||||
self.assertIn("paris", result["text"].lower())
|
||||
|
||||
throughput = max_tokens / (tok - tic)
|
||||
print(f"Throughput: {throughput} tokens/s")
|
||||
assert throughput >= 140
|
||||
self.assertGreaterEqual(throughput, 140)
|
||||
|
||||
def test_gptq_module(self):
|
||||
check_quant_method(self.MODEL_PATH, use_marlin_kernel=False)
|
||||
|
||||
Reference in New Issue
Block a user