release initial code

Co-authored-by: Ying Sheng <sqy1415@gmail.com> Co-authored-by: Liangsheng Yin <hnyls2002@gmail.com> Co-authored-by: Zhiqiang Xie <xiezhq@stanford.edu> Co-authored-by: parasol-aser <3848358+parasol-aser@users.noreply.github.com> Co-authored-by: LiviaSun <33578456+ChuyueSun@users.noreply.github.com> Co-authored-by: Cody Yu <hao.yu.cody@gmail.com>
2024-01-08 04:37:50 +00:00
parent f6d40df0ee
commit 22085081bb
145 changed files with 17802 additions and 2 deletions
--- a/benchmark/generative_agents/README.md
+++ b/benchmark/generative_agents/README.md
@@ -0,0 +1,26 @@
+## Run benchmark
+
+Ensure that this benchmark is run in a serial manner (using --parallel 1) to preserve any potential dependencies between requests.
+
+### Benchmark sglang
+```
+python -m sglang.launch_server --model-path meta-llama/Llama-2-7b-chat-hf --port 30000
+```
+
+```
+python3 bench_sglang.py --num-events 1000 --parallel 1
+```
+
+### Benchmark vllm
+```
+python3 -m vllm.entrypoints.api_server --tokenizer-mode auto --model meta-llama/Llama-2-7b-chat-hf --disable-log-requests --port 21000
+```
+
+```
+python3 bench_other.py --num-events 1000 --backend vllm --parallel 1
+```
+
+### Benchmark guidance
+```
+python3 bench_other.py --num-events 1000 --backend guidance --parallel 1
+```
--- a/benchmark/generative_agents/agent_functions.py
+++ b/benchmark/generative_agents/agent_functions.py
@@ -0,0 +1,231 @@
+import sglang as sgl
+
+# here are the top five agent functions contributing ~70% LLM calls
+# reference: https://github.com/joonspk-research/generative_agents/
+
+
+@sgl.function
+def poignancy_event(s, persona_name, persona_iss, event):
+    s += "Here is a brief description of " + persona_name + ".\n"
+    s += persona_iss + "\n"
+    s += "On the scale of 1 to 10, where 1 is purely mundane (e.g., brushing teeth, making bed) and 10 is extremely poignant (e.g., a break up, college acceptance), rate the likely poignancy of the following event for"
+    s += persona_name + ".\n\n"
+    s += "Event: " + event
+    s += "Rate (return a number between 1 to 10):"
+    s += sgl.gen(name="Rate", max_tokens=2)
+
+
+def poignancy_event_prompt(persona_name, persona_iss, event):
+    # return prompt and max_tokens
+    s = ""
+    s += "Here is a brief description of " + persona_name + ".\n"
+    s += persona_iss + "\n"
+    s += "On the scale of 1 to 10, where 1 is purely mundane (e.g., brushing teeth, making bed) and 10 is extremely poignant (e.g., a break up, college acceptance), rate the likely poignancy of the following event for"
+    s += persona_name + ".\n\n"
+    s += "Event: " + event
+    s += "Rate (return a number between 1 to 10):"
+    return {"prompt": s, "max_tokens": 2, "stop": None}
+
+
+@sgl.function
+def generate_event_triple(s, persona_name, action):
+    s += """Task: Turn the input into (subject, predicate, object).
+Input: Sam Johnson is eating breakfast. 
+Output: (Dolores Murphy, eat, breakfast) 
+--- 
+Input: Joon Park is brewing coffee.
+Output: (Joon Park, brew, coffee)
+---
+Input: Jane Cook is sleeping. 
+Output: (Jane Cook, is, sleep)
+---
+Input: Michael Bernstein is writing email on a computer. 
+Output: (Michael Bernstein, write, email)
+---
+Input: Percy Liang is teaching students in a classroom. 
+Output: (Percy Liang, teach, students)
+---
+Input: Merrie Morris is running on a treadmill. 
+Output: (Merrie Morris, run, treadmill)
+---"""
+    s += persona_name + "is" + action + ".\n"
+    s += "(" + persona_name + ","
+    s += sgl.gen(name="Triple", max_tokens=20, stop=")")
+
+
+def generate_event_triple_prompt(persona_name, action):
+    s = ""
+    s += """Task: Turn the input into (subject, predicate, object).
+Input: Sam Johnson is eating breakfast. 
+Output: (Dolores Murphy, eat, breakfast) 
+--- 
+Input: Joon Park is brewing coffee.
+Output: (Joon Park, brew, coffee)
+---
+Input: Jane Cook is sleeping. 
+Output: (Jane Cook, is, sleep)
+---
+Input: Michael Bernstein is writing email on a computer. 
+Output: (Michael Bernstein, write, email)
+---
+Input: Percy Liang is teaching students in a classroom. 
+Output: (Percy Liang, teach, students)
+---
+Input: Merrie Morris is running on a treadmill. 
+Output: (Merrie Morris, run, treadmill)
+---"""
+    s += persona_name + "is" + action + ".\n"
+    s += "(" + persona_name + ","
+    return {"prompt": s, "max_tokens": 20, "stop": ")"}
+
+
+@sgl.function
+def generate_pronunciatio(s, action):
+    s += "Convert an action description to an emoji (important: use two or less emojis).\n"
+    s += "Action description: " + action + ".\n"
+    s += "Emoji:" + sgl.gen(name="Emoji", max_tokens=6)
+
+
+def generate_pronunciatio_prompt(action):
+    s = ""
+    s += "Convert an action description to an emoji (important: use two or less emojis).\n"
+    s += "Action description: " + action + ".\n"
+    s += "Emoji:"
+    return {"prompt": s, "max_tokens": 6, "stop": None}
+
+
+@sgl.function
+def action_location_sector(
+    s,
+    persona_name,
+    living_sector,
+    living_sector_areas,
+    current_sector,
+    current_sector_areas,
+    daily_plan,
+    sector_options,
+    current_action,
+    next_action,
+):
+    s += """Task -- choose an appropriate area  from the area options for a task at hand. 
+Sam Kim lives in {Sam Kim's house} that has Sam Kim's room, bathroom, kitchen.
+Sam Kim is currently in {Sam Kim's house} that has Sam Kim's room, bathroom, kitchen. 
+Area options: {Sam Kim's house, The Rose and Crown Pub, Hobbs Cafe, Oak Hill College, Johnson Park, Harvey Oak Supply Store, The Willows Market and Pharmacy}.
+* Stay in the current area if the activity can be done there. Only go out if the activity needs to take place in another place.
+* Must be one of the "Area options," verbatim.
+For taking a walk, Sam Kim should go to the following area: {Johnson Park}
+---
+Jane Anderson lives in {Oak Hill College Student Dormatory} that has Jane Anderson's room.
+Jane Anderson is currently in {Oak Hill College} that has a classroom, library
+Area options: {Oak Hill College Student Dormatory, The Rose and Crown Pub, Hobbs Cafe, Oak Hill College, Johnson Park, Harvey Oak Supply Store, The Willows Market and Pharmacy}. 
+* Stay in the current area if the activity can be done there. Only go out if the activity needs to take place in another place.
+* Must be one of the "Area options," verbatim.
+For eating dinner, Jane Anderson should go to the following area: {Hobbs Cafe}
+---"""
+    s += (persona_name + " lives in " + living_sector + " that has " +
+          living_sector_areas + ".\n")
+    s += (persona_name + " is currently in " + current_sector + " that has " +
+          current_sector_areas + ".\n")
+    s += daily_plan + ".\n"
+    s += "Area options: " + sector_options + ".\n"
+    s += """* Stay in the current area if the activity can be done there. Only go out if the activity needs to take place in another place.
+* Must be one of the "Area options," verbatim.\n"""
+    s += (persona_name + " is " + current_action + ". For " + next_action +
+          ", " + persona_name + " should go to the following area: {")
+    s += sgl.gen(name="Location", max_tokens=10, stop="}")
+
+
+def action_location_sector_prompt(
+    persona_name,
+    living_sector,
+    living_sector_areas,
+    current_sector,
+    current_sector_areas,
+    daily_plan,
+    sector_options,
+    current_action,
+    next_action,
+):
+    s = ""
+    s += """Task -- choose an appropriate area  from the area options for a task at hand. 
+Sam Kim lives in {Sam Kim's house} that has Sam Kim's room, bathroom, kitchen.
+Sam Kim is currently in {Sam Kim's house} that has Sam Kim's room, bathroom, kitchen. 
+Area options: {Sam Kim's house, The Rose and Crown Pub, Hobbs Cafe, Oak Hill College, Johnson Park, Harvey Oak Supply Store, The Willows Market and Pharmacy}.
+* Stay in the current area if the activity can be done there. Only go out if the activity needs to take place in another place.
+* Must be one of the "Area options," verbatim.
+For taking a walk, Sam Kim should go to the following area: {Johnson Park}
+---
+Jane Anderson lives in {Oak Hill College Student Dormatory} that has Jane Anderson's room.
+Jane Anderson is currently in {Oak Hill College} that has a classroom, library
+Area options: {Oak Hill College Student Dormatory, The Rose and Crown Pub, Hobbs Cafe, Oak Hill College, Johnson Park, Harvey Oak Supply Store, The Willows Market and Pharmacy}. 
+* Stay in the current area if the activity can be done there. Only go out if the activity needs to take place in another place.
+* Must be one of the "Area options," verbatim.
+For eating dinner, Jane Anderson should go to the following area: {Hobbs Cafe}
+---"""
+    s += (persona_name + " lives in " + living_sector + " that has " +
+          living_sector_areas + ".\n")
+    s += (persona_name + " is currently in " + current_sector + " that has " +
+          current_sector_areas + ".\n")
+    s += daily_plan + ".\n"
+    s += "Area options: " + sector_options + ".\n"
+    s += """* Stay in the current area if the activity can be done there. Only go out if the activity needs to take place in another place.
+* Must be one of the "Area options," verbatim.\n"""
+    s += (persona_name + " is " + current_action + ". For " + next_action +
+          ", " + persona_name + " should go to the following area: {")
+    return {"prompt": s, "max_tokens": 10, "stop": "}"}
+
+
+@sgl.function
+def action_location_object(s, persona_name, target_sector, target_sector_areas,
+                           current_action, next_action):
+    s += """
+Jane Anderson is in kitchen in Jane Anderson's house.
+Jane Anderson is going to Jane Anderson's house that has the following areas: {kitchen,  bedroom, bathroom}
+Stay in the current area if the activity can be done there. Never go into other people's rooms unless necessary.
+For cooking, Jane Anderson should go to the following area in Jane Anderson's house:
+Answer: {kitchen}
+---
+Tom Watson is in common room in Tom Watson's apartment. 
+Tom Watson is going to Hobbs Cafe that has the following areas: {cafe}
+Stay in the current area if the activity can be done there. Never go into other people's rooms unless necessary.
+For getting coffee, Tom Watson should go to the following area in Hobbs Cafe:
+Answer: {cafe}
+---"""
+    s += (persona_name + " is going to " + target_sector +
+          " that has the following areas: {" + target_sector_areas + "}\n")
+    s += """* Stay in the current area if the activity can be done there. 
+* NEVER go into other people's rooms unless necessary."""
+    s += (persona_name + " is " + current_action + ". For " + next_action +
+          ", " + persona_name + "should go to the following area in " +
+          target_sector)
+    s += " (MUST pick one of {" + target_sector_areas + "}):\n"
+    s += "Answer: {" + sgl.gen(name="Area", max_tokens=5, stop="}")
+
+
+def action_location_object_prompt(persona_name, target_sector,
+                                  target_sector_areas, current_action,
+                                  next_action):
+    s = ""
+    s += """
+Jane Anderson is in kitchen in Jane Anderson's house.
+Jane Anderson is going to Jane Anderson's house that has the following areas: {kitchen,  bedroom, bathroom}
+Stay in the current area if the activity can be done there. Never go into other people's rooms unless necessary.
+For cooking, Jane Anderson should go to the following area in Jane Anderson's house:
+Answer: {kitchen}
+---
+Tom Watson is in common room in Tom Watson's apartment. 
+Tom Watson is going to Hobbs Cafe that has the following areas: {cafe}
+Stay in the current area if the activity can be done there. Never go into other people's rooms unless necessary.
+For getting coffee, Tom Watson should go to the following area in Hobbs Cafe:
+Answer: {cafe}
+---"""
+    s += (persona_name + " is going to " + target_sector +
+          " that has the following areas: {" + target_sector_areas + "}\n")
+    s += """* Stay in the current area if the activity can be done there. 
+* NEVER go into other people's rooms unless necessary."""
+    s += (persona_name + " is " + current_action + ". For " + next_action +
+          ", " + persona_name + "should go to the following area in " +
+          target_sector)
+    s += " (MUST pick one of {" + target_sector_areas + "}):\n"
+    s += "Answer: {"
+    return {"prompt": s, "max_tokens": 5, "stop": "}"}
--- a/benchmark/generative_agents/bench_other.py
+++ b/benchmark/generative_agents/bench_other.py
@@ -0,0 +1,104 @@
+import argparse
+from functools import partial
+import json
+import time
+from pathlib import Path
+
+from tqdm import tqdm
+from sglang.test.test_utils import (
+    add_common_other_args_and_parse,
+    call_generate_lightllm,
+    call_generate_vllm,
+    call_generate_srt_raw,
+)
+from sglang.utils import read_jsonl, dump_state_text
+
+from agent_functions import (
+    poignancy_event_prompt,
+    generate_event_triple_prompt,
+    generate_pronunciatio_prompt,
+    action_location_sector_prompt,
+    action_location_object_prompt,
+)
+
+
+def main(args):
+    lines = read_jsonl(args.data_path)[:args.num_events]
+    mapping = {
+        "poignancy_event": poignancy_event_prompt,
+        "generate_event_triple": generate_event_triple_prompt,
+        "generate_pronunciatio": generate_pronunciatio_prompt,
+        "action_location_sector": action_location_sector_prompt,
+        "action_location_object": action_location_object_prompt,
+    }
+
+    arguments = [mapping[k](**v) for l in lines for k, v in l.items()]
+    states = []
+
+    # Select backend
+    if args.backend == "lightllm":
+        url = f"{args.host}:{args.port}/generate"
+        call_generate = partial(call_generate_lightllm, url=url)
+    elif args.backend == "vllm":
+        url = f"{args.host}:{args.port}/generate"
+        call_generate = partial(call_generate_vllm, url=url)
+    elif args.backend == "srt-raw":
+        url = f"{args.host}:{args.port}/generate"
+        call_generate = partial(call_generate_srt_raw, url=url)
+    elif args.backend == "guidance":
+        from guidance import models, gen
+
+        model = models.LlamaCpp(
+            str(Path.home()) + "/model_weights/Llama-2-7b-chat.gguf",
+            n_gpu_layers=-1,
+            n_ctx=4096,
+        )
+
+        def call_generate(prompt, temperature, max_tokens, stop):
+            out = model + prompt + gen(
+                name="result",
+                max_tokens=max_tokens,
+                temperature=temperature,
+                stop=stop,
+            )
+            return out["result"]
+
+    else:
+        raise ValueError(f"Invalid backend: {args.backend}")
+
+    def get_one_answer(arg):
+        answer = call_generate(**arg, temperature=0)
+        states.append(answer)
+
+    tic = time.time()
+    # we always sequentially execute agent calls to maintain its dependency
+    for arg in tqdm(arguments):
+        get_one_answer(arg)
+    latency = time.time() - tic
+
+    print(f"Latency: {latency:.3f}")
+
+    # Write results
+    dump_state_text(f"tmp_output_{args.backend}.txt", states)
+
+    with open(args.result_file, "a") as fout:
+        value = {
+            "task": "Generative Agents",
+            "backend": args.backend,
+            "num_gpus": 1,
+            "latency": round(latency, 3),
+            # to pack weighted functions as a single agent            
+            "num_requests": len(arguments) / len(mapping),
+            "other": {
+                "parallel": args.parallel,
+            },
+        }
+        fout.write(json.dumps(value) + "\n")
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--data-path", type=str, default="agent_calls.jsonl")
+    parser.add_argument("--num-events", type=int, default=10)
+    args = add_common_other_args_and_parse(parser)
+    main(args)
--- a/benchmark/generative_agents/bench_sglang.py
+++ b/benchmark/generative_agents/bench_sglang.py
@@ -0,0 +1,74 @@
+import argparse
+import json
+import time
+
+import sglang as sgl
+from sglang.test.test_utils import (
+    add_common_sglang_args_and_parse,
+    select_sglang_backend,
+)
+from sglang.utils import read_jsonl, dump_state_text
+
+from agent_functions import (
+    poignancy_event,
+    generate_event_triple,
+    generate_pronunciatio,
+    action_location_sector,
+    action_location_object,
+)
+
+
+def main(args):
+    lines = read_jsonl(args.data_path)[:args.num_events]
+    mapping = {
+        "poignancy_event": poignancy_event,
+        "generate_event_triple": generate_event_triple,
+        "generate_pronunciatio": generate_pronunciatio,
+        "action_location_sector": action_location_sector,
+        "action_location_object": action_location_object,
+    }
+    arguments = [{mapping[k]: v for k, v in l.items()} for l in lines]
+
+    # Select backend
+    backend = select_sglang_backend(args)
+    sgl.set_default_backend(backend)
+
+    states = []
+    # Run requests
+    tic = time.time()
+    for a in arguments:
+        # only a single key in the dict
+        for func, arg in a.items():
+            result = func.run(**arg)
+        result.sync()
+        states.append(result)
+    latency = time.time() - tic
+
+    # Compute accuracy
+    print(f"Latency: {latency:.3f}")
+
+    # Write results
+    dump_state_text(f"tmp_output_{args.backend}.txt", states)
+
+    with open(args.result_file, "a") as fout:
+        value = {
+            "task": "Generative Agents",
+            "backend": args.backend,
+            "num_gpus": 1,
+            "latency": round(latency, 3),
+            # to pack weighted functions as a single agent
+            "num_requests": len(arguments) / len(mapping),
+            "other": {
+                "num_events": args.num_events,
+                "parallel": args.parallel,
+            },
+        }
+        fout.write(json.dumps(value) + "\n")
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--data-path", type=str, default="agent_calls.jsonl")
+    parser.add_argument("--num-events", type=int, default=10)
+    args = add_common_sglang_args_and_parse(parser)
+    main(args)