Fix the overhead due to penalizer in bench_latency (#1496)

2024-09-23 07:38:14 -07:00
parent 42a2d82ba7
commit 2854a5ea9f
6 changed files with 9 additions and 16 deletions
--- a/scripts/playground/reference_hf.py
+++ b/scripts/playground/reference_hf.py
@@ -45,7 +45,7 @@ def normal_text(args):
        "The capital of the United Kindom is",
        "Today is a sunny day and I like",
    ]
-    max_new_tokens = 17
+    max_new_tokens = 16

    torch.cuda.set_device(0)