release initial code

Co-authored-by: Ying Sheng <sqy1415@gmail.com>
Co-authored-by: Liangsheng Yin <hnyls2002@gmail.com>
Co-authored-by: Zhiqiang Xie <xiezhq@stanford.edu>
Co-authored-by: parasol-aser <3848358+parasol-aser@users.noreply.github.com>
Co-authored-by: LiviaSun <33578456+ChuyueSun@users.noreply.github.com>
Co-authored-by: Cody Yu <hao.yu.cody@gmail.com>
This commit is contained in:
Lianmin Zheng
2024-01-08 04:37:50 +00:00
parent f6d40df0ee
commit 22085081bb
145 changed files with 17802 additions and 2 deletions

View File

@@ -0,0 +1,26 @@
## Run benchmark
Ensure that this benchmark is run in a serial manner (using --parallel 1) to preserve any potential dependencies between requests.
### Benchmark sglang
```
python -m sglang.launch_server --model-path meta-llama/Llama-2-7b-chat-hf --port 30000
```
```
python3 bench_sglang.py --num-events 1000 --parallel 1
```
### Benchmark vllm
```
python3 -m vllm.entrypoints.api_server --tokenizer-mode auto --model meta-llama/Llama-2-7b-chat-hf --disable-log-requests --port 21000
```
```
python3 bench_other.py --num-events 1000 --backend vllm --parallel 1
```
### Benchmark guidance
```
python3 bench_other.py --num-events 1000 --backend guidance --parallel 1
```

View File

@@ -0,0 +1,231 @@
import sglang as sgl
# here are the top five agent functions contributing ~70% LLM calls
# reference: https://github.com/joonspk-research/generative_agents/
@sgl.function
def poignancy_event(s, persona_name, persona_iss, event):
s += "Here is a brief description of " + persona_name + ".\n"
s += persona_iss + "\n"
s += "On the scale of 1 to 10, where 1 is purely mundane (e.g., brushing teeth, making bed) and 10 is extremely poignant (e.g., a break up, college acceptance), rate the likely poignancy of the following event for"
s += persona_name + ".\n\n"
s += "Event: " + event
s += "Rate (return a number between 1 to 10):"
s += sgl.gen(name="Rate", max_tokens=2)
def poignancy_event_prompt(persona_name, persona_iss, event):
# return prompt and max_tokens
s = ""
s += "Here is a brief description of " + persona_name + ".\n"
s += persona_iss + "\n"
s += "On the scale of 1 to 10, where 1 is purely mundane (e.g., brushing teeth, making bed) and 10 is extremely poignant (e.g., a break up, college acceptance), rate the likely poignancy of the following event for"
s += persona_name + ".\n\n"
s += "Event: " + event
s += "Rate (return a number between 1 to 10):"
return {"prompt": s, "max_tokens": 2, "stop": None}
@sgl.function
def generate_event_triple(s, persona_name, action):
s += """Task: Turn the input into (subject, predicate, object).
Input: Sam Johnson is eating breakfast.
Output: (Dolores Murphy, eat, breakfast)
---
Input: Joon Park is brewing coffee.
Output: (Joon Park, brew, coffee)
---
Input: Jane Cook is sleeping.
Output: (Jane Cook, is, sleep)
---
Input: Michael Bernstein is writing email on a computer.
Output: (Michael Bernstein, write, email)
---
Input: Percy Liang is teaching students in a classroom.
Output: (Percy Liang, teach, students)
---
Input: Merrie Morris is running on a treadmill.
Output: (Merrie Morris, run, treadmill)
---"""
s += persona_name + "is" + action + ".\n"
s += "(" + persona_name + ","
s += sgl.gen(name="Triple", max_tokens=20, stop=")")
def generate_event_triple_prompt(persona_name, action):
s = ""
s += """Task: Turn the input into (subject, predicate, object).
Input: Sam Johnson is eating breakfast.
Output: (Dolores Murphy, eat, breakfast)
---
Input: Joon Park is brewing coffee.
Output: (Joon Park, brew, coffee)
---
Input: Jane Cook is sleeping.
Output: (Jane Cook, is, sleep)
---
Input: Michael Bernstein is writing email on a computer.
Output: (Michael Bernstein, write, email)
---
Input: Percy Liang is teaching students in a classroom.
Output: (Percy Liang, teach, students)
---
Input: Merrie Morris is running on a treadmill.
Output: (Merrie Morris, run, treadmill)
---"""
s += persona_name + "is" + action + ".\n"
s += "(" + persona_name + ","
return {"prompt": s, "max_tokens": 20, "stop": ")"}
@sgl.function
def generate_pronunciatio(s, action):
s += "Convert an action description to an emoji (important: use two or less emojis).\n"
s += "Action description: " + action + ".\n"
s += "Emoji:" + sgl.gen(name="Emoji", max_tokens=6)
def generate_pronunciatio_prompt(action):
s = ""
s += "Convert an action description to an emoji (important: use two or less emojis).\n"
s += "Action description: " + action + ".\n"
s += "Emoji:"
return {"prompt": s, "max_tokens": 6, "stop": None}
@sgl.function
def action_location_sector(
s,
persona_name,
living_sector,
living_sector_areas,
current_sector,
current_sector_areas,
daily_plan,
sector_options,
current_action,
next_action,
):
s += """Task -- choose an appropriate area from the area options for a task at hand.
Sam Kim lives in {Sam Kim's house} that has Sam Kim's room, bathroom, kitchen.
Sam Kim is currently in {Sam Kim's house} that has Sam Kim's room, bathroom, kitchen.
Area options: {Sam Kim's house, The Rose and Crown Pub, Hobbs Cafe, Oak Hill College, Johnson Park, Harvey Oak Supply Store, The Willows Market and Pharmacy}.
* Stay in the current area if the activity can be done there. Only go out if the activity needs to take place in another place.
* Must be one of the "Area options," verbatim.
For taking a walk, Sam Kim should go to the following area: {Johnson Park}
---
Jane Anderson lives in {Oak Hill College Student Dormatory} that has Jane Anderson's room.
Jane Anderson is currently in {Oak Hill College} that has a classroom, library
Area options: {Oak Hill College Student Dormatory, The Rose and Crown Pub, Hobbs Cafe, Oak Hill College, Johnson Park, Harvey Oak Supply Store, The Willows Market and Pharmacy}.
* Stay in the current area if the activity can be done there. Only go out if the activity needs to take place in another place.
* Must be one of the "Area options," verbatim.
For eating dinner, Jane Anderson should go to the following area: {Hobbs Cafe}
---"""
s += (persona_name + " lives in " + living_sector + " that has " +
living_sector_areas + ".\n")
s += (persona_name + " is currently in " + current_sector + " that has " +
current_sector_areas + ".\n")
s += daily_plan + ".\n"
s += "Area options: " + sector_options + ".\n"
s += """* Stay in the current area if the activity can be done there. Only go out if the activity needs to take place in another place.
* Must be one of the "Area options," verbatim.\n"""
s += (persona_name + " is " + current_action + ". For " + next_action +
", " + persona_name + " should go to the following area: {")
s += sgl.gen(name="Location", max_tokens=10, stop="}")
def action_location_sector_prompt(
persona_name,
living_sector,
living_sector_areas,
current_sector,
current_sector_areas,
daily_plan,
sector_options,
current_action,
next_action,
):
s = ""
s += """Task -- choose an appropriate area from the area options for a task at hand.
Sam Kim lives in {Sam Kim's house} that has Sam Kim's room, bathroom, kitchen.
Sam Kim is currently in {Sam Kim's house} that has Sam Kim's room, bathroom, kitchen.
Area options: {Sam Kim's house, The Rose and Crown Pub, Hobbs Cafe, Oak Hill College, Johnson Park, Harvey Oak Supply Store, The Willows Market and Pharmacy}.
* Stay in the current area if the activity can be done there. Only go out if the activity needs to take place in another place.
* Must be one of the "Area options," verbatim.
For taking a walk, Sam Kim should go to the following area: {Johnson Park}
---
Jane Anderson lives in {Oak Hill College Student Dormatory} that has Jane Anderson's room.
Jane Anderson is currently in {Oak Hill College} that has a classroom, library
Area options: {Oak Hill College Student Dormatory, The Rose and Crown Pub, Hobbs Cafe, Oak Hill College, Johnson Park, Harvey Oak Supply Store, The Willows Market and Pharmacy}.
* Stay in the current area if the activity can be done there. Only go out if the activity needs to take place in another place.
* Must be one of the "Area options," verbatim.
For eating dinner, Jane Anderson should go to the following area: {Hobbs Cafe}
---"""
s += (persona_name + " lives in " + living_sector + " that has " +
living_sector_areas + ".\n")
s += (persona_name + " is currently in " + current_sector + " that has " +
current_sector_areas + ".\n")
s += daily_plan + ".\n"
s += "Area options: " + sector_options + ".\n"
s += """* Stay in the current area if the activity can be done there. Only go out if the activity needs to take place in another place.
* Must be one of the "Area options," verbatim.\n"""
s += (persona_name + " is " + current_action + ". For " + next_action +
", " + persona_name + " should go to the following area: {")
return {"prompt": s, "max_tokens": 10, "stop": "}"}
@sgl.function
def action_location_object(s, persona_name, target_sector, target_sector_areas,
current_action, next_action):
s += """
Jane Anderson is in kitchen in Jane Anderson's house.
Jane Anderson is going to Jane Anderson's house that has the following areas: {kitchen, bedroom, bathroom}
Stay in the current area if the activity can be done there. Never go into other people's rooms unless necessary.
For cooking, Jane Anderson should go to the following area in Jane Anderson's house:
Answer: {kitchen}
---
Tom Watson is in common room in Tom Watson's apartment.
Tom Watson is going to Hobbs Cafe that has the following areas: {cafe}
Stay in the current area if the activity can be done there. Never go into other people's rooms unless necessary.
For getting coffee, Tom Watson should go to the following area in Hobbs Cafe:
Answer: {cafe}
---"""
s += (persona_name + " is going to " + target_sector +
" that has the following areas: {" + target_sector_areas + "}\n")
s += """* Stay in the current area if the activity can be done there.
* NEVER go into other people's rooms unless necessary."""
s += (persona_name + " is " + current_action + ". For " + next_action +
", " + persona_name + "should go to the following area in " +
target_sector)
s += " (MUST pick one of {" + target_sector_areas + "}):\n"
s += "Answer: {" + sgl.gen(name="Area", max_tokens=5, stop="}")
def action_location_object_prompt(persona_name, target_sector,
target_sector_areas, current_action,
next_action):
s = ""
s += """
Jane Anderson is in kitchen in Jane Anderson's house.
Jane Anderson is going to Jane Anderson's house that has the following areas: {kitchen, bedroom, bathroom}
Stay in the current area if the activity can be done there. Never go into other people's rooms unless necessary.
For cooking, Jane Anderson should go to the following area in Jane Anderson's house:
Answer: {kitchen}
---
Tom Watson is in common room in Tom Watson's apartment.
Tom Watson is going to Hobbs Cafe that has the following areas: {cafe}
Stay in the current area if the activity can be done there. Never go into other people's rooms unless necessary.
For getting coffee, Tom Watson should go to the following area in Hobbs Cafe:
Answer: {cafe}
---"""
s += (persona_name + " is going to " + target_sector +
" that has the following areas: {" + target_sector_areas + "}\n")
s += """* Stay in the current area if the activity can be done there.
* NEVER go into other people's rooms unless necessary."""
s += (persona_name + " is " + current_action + ". For " + next_action +
", " + persona_name + "should go to the following area in " +
target_sector)
s += " (MUST pick one of {" + target_sector_areas + "}):\n"
s += "Answer: {"
return {"prompt": s, "max_tokens": 5, "stop": "}"}

View File

@@ -0,0 +1,104 @@
import argparse
from functools import partial
import json
import time
from pathlib import Path
from tqdm import tqdm
from sglang.test.test_utils import (
add_common_other_args_and_parse,
call_generate_lightllm,
call_generate_vllm,
call_generate_srt_raw,
)
from sglang.utils import read_jsonl, dump_state_text
from agent_functions import (
poignancy_event_prompt,
generate_event_triple_prompt,
generate_pronunciatio_prompt,
action_location_sector_prompt,
action_location_object_prompt,
)
def main(args):
lines = read_jsonl(args.data_path)[:args.num_events]
mapping = {
"poignancy_event": poignancy_event_prompt,
"generate_event_triple": generate_event_triple_prompt,
"generate_pronunciatio": generate_pronunciatio_prompt,
"action_location_sector": action_location_sector_prompt,
"action_location_object": action_location_object_prompt,
}
arguments = [mapping[k](**v) for l in lines for k, v in l.items()]
states = []
# Select backend
if args.backend == "lightllm":
url = f"{args.host}:{args.port}/generate"
call_generate = partial(call_generate_lightllm, url=url)
elif args.backend == "vllm":
url = f"{args.host}:{args.port}/generate"
call_generate = partial(call_generate_vllm, url=url)
elif args.backend == "srt-raw":
url = f"{args.host}:{args.port}/generate"
call_generate = partial(call_generate_srt_raw, url=url)
elif args.backend == "guidance":
from guidance import models, gen
model = models.LlamaCpp(
str(Path.home()) + "/model_weights/Llama-2-7b-chat.gguf",
n_gpu_layers=-1,
n_ctx=4096,
)
def call_generate(prompt, temperature, max_tokens, stop):
out = model + prompt + gen(
name="result",
max_tokens=max_tokens,
temperature=temperature,
stop=stop,
)
return out["result"]
else:
raise ValueError(f"Invalid backend: {args.backend}")
def get_one_answer(arg):
answer = call_generate(**arg, temperature=0)
states.append(answer)
tic = time.time()
# we always sequentially execute agent calls to maintain its dependency
for arg in tqdm(arguments):
get_one_answer(arg)
latency = time.time() - tic
print(f"Latency: {latency:.3f}")
# Write results
dump_state_text(f"tmp_output_{args.backend}.txt", states)
with open(args.result_file, "a") as fout:
value = {
"task": "Generative Agents",
"backend": args.backend,
"num_gpus": 1,
"latency": round(latency, 3),
# to pack weighted functions as a single agent
"num_requests": len(arguments) / len(mapping),
"other": {
"parallel": args.parallel,
},
}
fout.write(json.dumps(value) + "\n")
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("--data-path", type=str, default="agent_calls.jsonl")
parser.add_argument("--num-events", type=int, default=10)
args = add_common_other_args_and_parse(parser)
main(args)

View File

@@ -0,0 +1,74 @@
import argparse
import json
import time
import sglang as sgl
from sglang.test.test_utils import (
add_common_sglang_args_and_parse,
select_sglang_backend,
)
from sglang.utils import read_jsonl, dump_state_text
from agent_functions import (
poignancy_event,
generate_event_triple,
generate_pronunciatio,
action_location_sector,
action_location_object,
)
def main(args):
lines = read_jsonl(args.data_path)[:args.num_events]
mapping = {
"poignancy_event": poignancy_event,
"generate_event_triple": generate_event_triple,
"generate_pronunciatio": generate_pronunciatio,
"action_location_sector": action_location_sector,
"action_location_object": action_location_object,
}
arguments = [{mapping[k]: v for k, v in l.items()} for l in lines]
# Select backend
backend = select_sglang_backend(args)
sgl.set_default_backend(backend)
states = []
# Run requests
tic = time.time()
for a in arguments:
# only a single key in the dict
for func, arg in a.items():
result = func.run(**arg)
result.sync()
states.append(result)
latency = time.time() - tic
# Compute accuracy
print(f"Latency: {latency:.3f}")
# Write results
dump_state_text(f"tmp_output_{args.backend}.txt", states)
with open(args.result_file, "a") as fout:
value = {
"task": "Generative Agents",
"backend": args.backend,
"num_gpus": 1,
"latency": round(latency, 3),
# to pack weighted functions as a single agent
"num_requests": len(arguments) / len(mapping),
"other": {
"num_events": args.num_events,
"parallel": args.parallel,
},
}
fout.write(json.dumps(value) + "\n")
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("--data-path", type=str, default="agent_calls.jsonl")
parser.add_argument("--num-events", type=int, default=10)
args = add_common_sglang_args_and_parse(parser)
main(args)