Update benchmark scripts (#8)

2024-01-15 16:12:57 -08:00
parent 01ca82d765
commit 70359bf31a
28 changed files with 183 additions and 50 deletions
--- a/benchmark/react/README.md
+++ b/benchmark/react/README.md
@@ -1,5 +1,7 @@
 ## Run benchmark

+NOTE: This is an implementation for replaying a given trace for throughput/latency benchmark purposes. It is not an actual ReAct agent implementation.
+
 ### Benchmark sglang
 ```
 python -m sglang.launch_server --model-path meta-llama/Llama-2-7b-chat-hf --port 30000
--- a/benchmark/react/bench_other.py
+++ b/benchmark/react/bench_other.py
@@ -124,6 +124,9 @@ def main(args):
            ))
            return out["result"]

+        # warmup
+        call_generate("Hello,", 1.0, 8, ".")
+
    else:
        raise ValueError(f"Invalid backend: {args.backend}")

--- a/benchmark/react/bench_sglang.py
+++ b/benchmark/react/bench_sglang.py
@@ -82,9 +82,10 @@ Action 3: Finish[yes]
 """ + question)
    for i in range(1, len(triplets) + 2):
        s += "Thought " + str(i) + ":"
+        # NOTE: This is an implementation for replaying a given trace for benchmark purposes. It is not an actual ReAct agent implementation.
        ss = s.fork(1)
        ss[0] += sgl.gen(name="thought_action", max_tokens=200, stop="Observation")
-        # ss.join()
+        ss.join()
        # to verify the correctness of output, this should be collected
        # print(ss[0]["thought_action"])
        if i > len(triplets):
--- a/benchmark/react/hotpotqa_100.jsonl
+++ b/benchmark/react/hotpotqa_100.jsonl