Update benchmark scripts (#8)

This commit is contained in:
Lianmin Zheng
2024-01-15 16:12:57 -08:00
committed by GitHub
parent 01ca82d765
commit 70359bf31a
28 changed files with 183 additions and 50 deletions

View File

@@ -1,5 +1,7 @@
## Run benchmark
NOTE: This is an implementation for replaying a given trace for throughput/latency benchmark purposes. It is not an actual ReAct agent implementation.
### Benchmark sglang
```
python -m sglang.launch_server --model-path meta-llama/Llama-2-7b-chat-hf --port 30000

View File

@@ -124,6 +124,9 @@ def main(args):
))
return out["result"]
# warmup
call_generate("Hello,", 1.0, 8, ".")
else:
raise ValueError(f"Invalid backend: {args.backend}")

View File

@@ -82,9 +82,10 @@ Action 3: Finish[yes]
""" + question)
for i in range(1, len(triplets) + 2):
s += "Thought " + str(i) + ":"
# NOTE: This is an implementation for replaying a given trace for benchmark purposes. It is not an actual ReAct agent implementation.
ss = s.fork(1)
ss[0] += sgl.gen(name="thought_action", max_tokens=200, stop="Observation")
# ss.join()
ss.join()
# to verify the correctness of output, this should be collected
# print(ss[0]["thought_action"])
if i > len(triplets):

File diff suppressed because one or more lines are too long