Add torchao quant (int4/int8/fp8) to llama models (#1341)

Co-authored-by: Lianmin Zheng <lianminzheng@gmail.com>
This commit is contained in:
Jerry Zhang
2024-09-09 05:32:41 -07:00
committed by GitHub
parent e4d68afcf0
commit a7c47e0f02
10 changed files with 151 additions and 12 deletions

View File

@@ -29,12 +29,12 @@ class TestEvalAccuracyMini(unittest.TestCase):
base_url=self.base_url,
model=self.model,
eval_name="mmlu",
num_examples=32,
num_examples=64,
num_threads=32,
)
metrics = run_eval(args)
assert metrics["score"] >= 0.6
assert metrics["score"] >= 0.65
if __name__ == "__main__":