jump-forward rename (#144)

This commit is contained in:
Liangsheng Yin
2024-02-05 16:50:37 +08:00
committed by GitHub
parent 82fa69b3cc
commit 26f0bedc8f
12 changed files with 70 additions and 70 deletions

View File

@@ -0,0 +1,74 @@
## Run benchmark
### Dependencies
```
llama_cpp_python 0.2.38
guidance 0.1.10
vllm 0.2.7
outlines 0.0.25
```
### Build dataset
When benchmarking long document information retrieval, run the following command to build the dataset:
```bash
pip install wikipedia
python3 build_dataset.py
```
### Benchmark sglang
Run Llama-7B
```bash
python3 -m sglang.launch_server --model-path meta-llama/Llama-2-7b-chat-hf --port 30000
```
Benchmark Character Generation
```bash
python3 bench_sglang.py --mode character
```
Benchmark City Information Retrieval
```bash
python3 bench_sglang.py --mode city
```
### Benchmark vllm
Run Llama-7B
```bash
python3 -m outlines.serve.serve --tokenizer-mode auto --model meta-llama/Llama-2-7b-chat-hf --disable-log-requests --port 21000
```
Benchmark Character Generation
```bash
python3 bench_other.py --mode character --backend vllm
```
Benchmark City Information Retrieval
```bash
python3 bench_other.py --mode city --backend vllm
```
### Benchmark guidance
Run Llama-7B and benchmark character generation
```bash
python3 bench_other.py --mode character --backend guidance --parallel 1
```
Run Llama-7B and benchmark city information retrieval
```bash
python3 bench_other.py --mode city --backend guidance --parallel 1
```

View File

@@ -0,0 +1,245 @@
import argparse
import json
import time
from concurrent.futures import ThreadPoolExecutor
from functools import partial
import guidance
from sglang.test.test_utils import (
add_common_other_args_and_parse,
call_generate_outlines,
)
from sglang.utils import dump_state_text, read_jsonl
from tqdm import tqdm
# there are some FSM bugs with json regex converted from pydantic model
# here use a string regex instead
# regex_string = build_regex_from_object(HarryPoterRole)
character_regex = (
r"""\{\n"""
+ r""" "name": "[\w\d\s]{1,16}",\n"""
+ r""" "house": "(Gryffindor|Slytherin|Ravenclaw|Hufflepuff)",\n"""
+ r""" "blood status": "(Pure-blood|Half-blood|Muggle-born)",\n"""
+ r""" "occupation": "(student|teacher|auror|ministry of magic|death eater|order of the phoenix)",\n"""
+ r""" "wand": \{\n"""
+ r""" "wood": "[\w\d\s]{1,16}",\n"""
+ r""" "core": "[\w\d\s]{1,16}",\n"""
+ r""" "length": [0-9]{1,2}\.[0-9]{0,2}\n"""
+ r""" \},\n"""
+ r""" "alive": "(Alive|Deceased)",\n"""
+ r""" "patronus": "[\w\d\s]{1,16}",\n"""
+ r""" "bogart": "[\w\d\s]{1,16}"\n"""
+ r"""\}"""
)
city_regex = (
r"""\{\n"""
+ r""" "name": "[\w\d\s]{1,16}",\n"""
+ r""" "country": "[\w\d\s]{1,16}",\n"""
+ r""" "latitude": [-+]?[0-9]*\.?[0-9]{0,2},\n"""
+ r""" "population": [-+]?[0-9]{1,9},\n"""
+ r""" "top 3 landmarks": \["[\w\d\s]{1,16}", "[\w\d\s]{1,16}", "[\w\d\s]{1,16}"\]\n"""
+ r"""\}"""
)
# fmt: off
def character_gen(name, generate):
s = name + " is a character in Harry Potter. Please fill in the following information about this character.\n"
s += generate(s, max_tokens=256, regex=character_regex)
return s
# fmt: on
# fmt: off
def city_gen(document, generate):
s = "Please extract the information of a city from the following wikipedia page.\n"
s += "Page begin.\n" + document + "Page end.\n"
s += "Here is the name, country, and symbol of the city in JSON format.\n"
s += generate(s, max_tokens=256, regex=city_regex)
return s
# fmt: on
@guidance
def character_maker(lm, name):
regex_str_no_quote = r"[\w\d\s]+"
regex_float = r"[0-9]+\.[0-9]+"
lm += f"""\
{name} is a character in Harry Potter. Please fill in the following information about him/her.
{{
"name": "{guidance.gen("name", max_tokens=16, regex=regex_str_no_quote)}",
"house": "{guidance.select(options=['Gryffindor', 'Slytherin', 'Ravenclaw', 'Hufflepuff'], name='house')}",
"blood status": "{guidance.select(options=['Pure-blood', 'Half-blood', 'Muggle-born'], name='blood status')}",
"occupation": "{guidance.select(options=['student', 'teacher', 'auror', 'ministry of magic', 'death eater', 'order of the phoenix'], name='occupation')}",
"wand": {{
"wood": "{guidance.gen("wood", max_tokens=16, regex=regex_str_no_quote)}",
"core": "{guidance.gen('core', max_tokens=16, regex=regex_str_no_quote)}",
"length": {guidance.gen('length', max_tokens=10, regex=regex_float)}
}},
"alive": "{guidance.select(options=['Alive', 'Deceased'], name='alive')}",
"patronus": "{guidance.gen('patronus', max_tokens=16, regex=regex_str_no_quote)}",
"bogart": "{guidance.gen('bogart', max_tokens=16, regex=regex_str_no_quote)}"
}}
"""
return lm
@guidance
def city_maker(lm, document):
regex_str_no_quote = r"[\w\d\s]+"
regex_float = r"[0-9]+\.[0-9]+"
lm += f"""\
Please extract the information of a city from the following wikipedia page.
Page begin.
{document}
Page end.
Here is the name, country, and symbol of the city in JSON format.
{{
"name": "{guidance.gen("name", max_tokens=16, regex=regex_str_no_quote)}",
"country": "{guidance.gen("country", max_tokens=16, regex=regex_str_no_quote)}",
"latitude": {guidance.gen("latitude", max_tokens=10, regex=regex_float)},
"population": {guidance.gen("population", max_tokens=10, regex=r"[0-9]+")},
"top 3 landmarks": [
"{guidance.gen("landmark1", max_tokens=16, regex=regex_str_no_quote)}", "{guidance.gen("landmark2", max_tokens=16, regex=regex_str_no_quote)}", "{guidance.gen("landmark3", max_tokens=16, regex=regex_str_no_quote)}"
]
}}
"""
return lm
def bench_character(args):
arguments = []
with open(args.data_path, "r") as f:
for line in f:
arguments.append({"name": line.strip()})
arguments = arguments[: args.num_jsons]
states = [None] * len(arguments)
# Select backend
if args.backend == "vllm":
url = f"{args.host}:{args.port}/generate"
generate = partial(call_generate_outlines, url=url, temperature=0)
def func(i):
states[i] = character_gen(**arguments[i], generate=generate)
get_one_answer = func
elif args.backend == "guidance":
model = guidance.models.LlamaCpp(
args.llama_cpp_model_path,
n_gpu_layers=-1,
n_ctx=4096,
)
def func(i):
lm = model + character_maker(**arguments[i])
states[i] = lm
get_one_answer = func
else:
raise ValueError(f"Invalid backend: {args.backend}")
tic = time.time()
if args.parallel == 1:
for i in tqdm(range(len(arguments))):
get_one_answer(i)
else:
with ThreadPoolExecutor(args.parallel) as executor:
rets = executor.map(get_one_answer, list(range(len(arguments))))
for _ in rets:
pass
latency = time.time() - tic
return states, latency
def bench_city_doc(args):
arguments = []
for line in read_jsonl(args.data_path):
arguments.append({"document": line["document"]})
arguments = arguments[: args.num_jsons]
states = [None] * len(arguments)
# Select backend
if args.backend == "vllm":
url = f"{args.host}:{args.port}/generate"
generate = partial(call_generate_outlines, url=url, temperature=0)
def func(i):
states[i] = city_gen(**arguments[i], generate=generate)
get_one_answer = func
elif args.backend == "guidance":
model = guidance.models.LlamaCpp(
args.llama_cpp_model_path,
n_gpu_layers=-1,
n_ctx=4096,
)
def func(i):
lm = model + city_maker(**arguments[i])
states[i] = lm
get_one_answer = func
else:
raise ValueError(f"Invalid backend: {args.backend}")
tic = time.time()
if args.parallel == 1:
for i in tqdm(range(len(arguments))):
get_one_answer(i)
else:
with ThreadPoolExecutor(args.parallel) as executor:
rets = executor.map(get_one_answer, list(range(len(arguments))))
for _ in rets:
pass
latency = time.time() - tic
return states, latency
def main(args):
if args.mode == "character":
args.data_path = "dataset.txt"
states, latency = bench_character(args)
elif args.mode == "city":
args.data_path = "questions.jsonl"
states, latency = bench_city_doc(args)
# Compute accuracy
print(f"Latency: {latency:.3f}")
# Write results
dump_state_text(f"tmp_output_{args.backend}_{args.mode}.txt", states)
with open(args.result_file, "a") as fout:
value = {
"task": "json_jump_forward",
"backend": args.backend,
"latency": round(latency, 3),
"num_jsons": args.num_jsons,
"mode": args.mode,
"parallel": args.parallel,
}
fout.write(json.dumps(value) + "\n")
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("--data-path", type=str)
parser.add_argument("--num-jsons", type=int, default=50)
parser.add_argument(
"--mode", type=str, default="character", choices=["character", "city"]
)
parser.add_argument(
"--llama-cpp-model-path",
type=str,
default="/home/ubuntu/model_weights/Llama-2-7b-chat-hf/ggml-model-f16.gguf",
)
args = add_common_other_args_and_parse(parser)
main(args)

View File

@@ -0,0 +1,143 @@
import argparse
import json
import time
import sglang as sgl
from sglang.test.test_utils import (
add_common_sglang_args_and_parse,
select_sglang_backend,
)
from sglang.utils import dump_state_text, read_jsonl
# there are some FSM bugs with json regex converted from pydantic model
# here use a string regex instead
# regex_string = build_regex_from_object(HarryPoterRole)
character_regex = (
r"""\{\n"""
+ r""" "name": "[\w\d\s]{1,16}",\n"""
+ r""" "house": "(Gryffindor|Slytherin|Ravenclaw|Hufflepuff)",\n"""
+ r""" "blood status": "(Pure-blood|Half-blood|Muggle-born)",\n"""
+ r""" "occupation": "(student|teacher|auror|ministry of magic|death eater|order of the phoenix)",\n"""
+ r""" "wand": \{\n"""
+ r""" "wood": "[\w\d\s]{1,16}",\n"""
+ r""" "core": "[\w\d\s]{1,16}",\n"""
+ r""" "length": [0-9]{1,2}\.[0-9]{0,2}\n"""
+ r""" \},\n"""
+ r""" "alive": "(Alive|Deceased)",\n"""
+ r""" "patronus": "[\w\d\s]{1,16}",\n"""
+ r""" "bogart": "[\w\d\s]{1,16}"\n"""
+ r"""\}"""
)
city_regex = (
r"""\{\n"""
+ r""" "name": "[\w\d\s]{1,16}",\n"""
+ r""" "country": "[\w\d\s]{1,16}",\n"""
+ r""" "latitude": [-+]?[0-9]*\.?[0-9]{0,2},\n"""
+ r""" "population": [-+]?[0-9]{1,9},\n"""
+ r""" "top 3 landmarks": \["[\w\d\s]{1,16}", "[\w\d\s]{1,16}", "[\w\d\s]{1,16}"\]\n"""
+ r"""\}"""
)
# fmt: off
@sgl.function
def character_gen(s, name):
s += name + " is a character in Harry Potter. Please fill in the following information about this character.\n"
s += sgl.gen("json_output", max_tokens=256, regex=character_regex)
# fmt: on
# fmt: off
@sgl.function
def city_gen(s, document):
s += "Please extract the information of a city from the following wikipedia page.\n"
s += "Page begin.\n" + document + "Page end.\n"
s += "Here is the name, country, and symbol of the city in JSON format.\n"
s += sgl.gen("json_output",max_tokens=256, regex=city_regex)
# fmt: on
def bench_city_doc(args):
arguments = []
for line in read_jsonl(args.data_path):
arguments.append({"document": line["document"]})
arguments = arguments[: args.num_jsons]
# Select backend
backend = select_sglang_backend(args)
sgl.set_default_backend(backend)
# Run requests
tic = time.time()
states = city_gen.run_batch(
arguments,
temperature=0,
num_threads=args.parallel,
progress_bar=(args.parallel == 1),
)
latency = time.time() - tic
return states, latency
def bench_character(args):
arguments = []
with open(args.data_path, "r") as f:
for line in f:
arguments.append({"name": line.strip()})
arguments = arguments[: args.num_jsons]
# Select backend
backend = select_sglang_backend(args)
sgl.set_default_backend(backend)
# Run requests
tic = time.time()
states = character_gen.run_batch(
arguments,
temperature=0,
num_threads=args.parallel,
progress_bar=(args.parallel == 1),
)
latency = time.time() - tic
return states, latency
def main(args):
if args.mode == "character":
args.data_path = "dataset.txt"
states, latency = bench_character(args)
elif args.mode == "city":
args.data_path = "questions.jsonl"
states, latency = bench_city_doc(args)
# Compute accuracy
print(f"Latency: {latency:.3f}")
# Write results
dump_state_text(f"tmp_output_{args.backend}_{args.mode}.txt", states)
with open(f"{args.backend}_{args.mode}.json", "w") as fout:
for state in states:
fout.write(state["json_output"] + "\n")
with open(args.result_file, "a") as fout:
value = {
"task": "json_jump_forward",
"backend": args.backend,
"latency": round(latency, 3),
"num_jsons": args.num_jsons,
"mode": args.mode,
"parallel": args.parallel,
}
fout.write(json.dumps(value) + "\n")
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("--data-path", type=str)
parser.add_argument("--num-jsons", type=int, default=50)
parser.add_argument(
"--mode", type=str, default="character", choices=["character", "city"]
)
args = add_common_sglang_args_and_parse(parser)
main(args)

View File

@@ -0,0 +1,58 @@
import json
import transformers
import wikipedia
model_path = "meta-llama/Llama-2-7b-chat-hf"
t = transformers.AutoTokenizer.from_pretrained(model_path)
city_names = [
"los angles",
"london",
"tokyo",
"beijing",
"singapore",
"paris",
"dubai",
"sydney",
"moscow",
"rome",
"toronto",
"rio de janeiro",
"istanbul",
"berlin",
"auckland",
"buenos aires",
"mexico city",
"mumbai",
"seoul",
"bangkok",
"cairo",
"athens",
"jerusalem",
]
def get_content(city_name):
content = str(wikipedia.page(city_name).content)
content = content.replace("\n\n", "\n")
tokens = t.encode(content)
expected_tokens = 3000
truncate_len = int((expected_tokens / len(tokens)) * len(content))
truncate_content = content[:truncate_len]
truncate_tokens = t.encode(truncate_content)
# Count token
print(
f"city_name: {city_name}, #tokens: {len(tokens)}, #truncate tokens: {len(truncate_tokens)}"
)
return truncate_content
if __name__ == "__main__":
with open("questions.jsonl", "w") as fout:
for city_name in city_names:
truncate_content = get_content(city_name)
fout.write(json.dumps({"document": truncate_content}) + "\n")

View File

@@ -0,0 +1,50 @@
Harry Potter
Hermione Granger
Ron Weasley
Albus Dumbledore
Severus Snape
Rubeus Hagrid
Draco Malfoy
Ginny Weasley
Fred Weasley
George Weasley
Percy Weasley
Sirius Black
Remus Lupin
Neville Longbottom
Luna Lovegood
Cedric Diggory
Cho Chang
Lord Voldemort
Minerva McGonagall
Filius Flitwick
Dolores Umbridge
Bellatrix Lestrange
Lucius Malfoy
Molly Weasley
Arthur Weasley
Nymphadora Tonks
Dobby
Moaning Myrtle
Peter Pettigrew
Alastor 'Mad-Eye' Moody
Horace Slughorn
Vernon Dursley
Petunia Dursley
Dudley Dursley
Argus Filch
Sybill Trelawney
Gilderoy Lockhart
Fleur Delacour
Viktor Krum
Bill Weasley
Oliver Wood
Cornelius Fudge
Barty Crouch Sr.
Barty Crouch Jr.
Kingsley Shacklebolt
Quirinus Quirrell
Nearly Headless Nick
Aunt Marge
Griphook
Ludo Bagman