2024-01-08 04:37:50 +00:00
import argparse
import json
import time
2024-04-28 21:06:22 +08:00
from concurrent . futures import ThreadPoolExecutor
2024-01-08 04:37:50 +00:00
from tqdm import tqdm
2024-04-28 21:06:22 +08:00
2024-05-05 16:14:17 +08:00
from sglang . test . test_utils import add_common_other_args_and_parse , get_call_generate
2024-04-28 21:06:22 +08:00
from sglang . utils import dump_state_text , read_jsonl
2024-01-08 04:37:50 +00:00
def get_prompt ( question ) :
prompt = (
2024-11-06 21:46:04 +08:00
""" Solve a question answering task with interleaving Thought, Action, Observation steps. Thought can reason about the current situation, and Action can be three types:
2024-01-08 04:37:50 +00:00
( 1 ) Search [ entity ] , which searches the exact entity on Wikipedia and returns the first paragraph if it exists . If not , it will return some similar entities to search .
( 2 ) Lookup [ keyword ] , which returns the next sentence containing keyword in the current passage .
( 3 ) Finish [ answer ] , which returns the answer and finishes the task .
Here are some examples .
Question : What is the elevation range for the area that the eastern sector of the Colorado orogeny extends into ?
Thought 1 : I need to search Colorado orogeny , find the area that the eastern sector of the Colorado orogeny extends into , then find the elevation range of the area .
Action 1 : Search [ Colorado orogeny ]
Observation 1 : The Colorado orogeny was an episode of mountain building ( an orogeny ) in Colorado and surrounding areas .
Thought 2 : It does not mention the eastern sector . So I need to look up eastern sector .
Action 2 : Lookup [ eastern sector ]
Observation 2 : ( Result 1 / 1 ) The eastern sector extends into the High Plains and is called the Central Plains orogeny .
Thought 3 : The eastern sector of Colorado orogeny extends into the High Plains . So I need to search High Plains and find its elevation range .
Action 3 : Search [ High Plains ]
Observation 3 : High Plains refers to one of two distinct land regions :
Thought 4 : I need to instead search High Plains ( United States ) .
Action 4 : Search [ High Plains ( United States ) ]
Observation 4 : The High Plains are a subregion of the Great Plains . From east to west , the High Plains rise in elevation from around 1 , 800 to 7 , 000 ft ( 550 to 2 , 130 m ) . [ 3 ]
Thought 5 : High Plains rise in elevation from around 1 , 800 to 7 , 000 ft , so the answer is 1 , 800 to 7 , 000 ft .
Action 5 : Finish [ 1 , 800 to 7 , 000 ft ]
Question : Musician and satirist Allie Goertz wrote a song about the " The Simpsons " character Milhouse , who Matt Groening named after who ?
Thought 1 : The question simplifies to " The Simpsons " character Milhouse is named after who . I only need to search Milhouse and find who it is named after .
Action 1 : Search [ Milhouse ]
Observation 1 : Milhouse Mussolini Van Houten is a recurring character in the Fox animated television series The Simpsons voiced by Pamela Hayden and created by Matt Groening .
Thought 2 : The paragraph does not tell who Milhouse is named after , maybe I can look up " named after " .
Action 2 : Lookup [ named after ]
2024-11-06 21:46:04 +08:00
Observation 2 : ( Result 1 / 1 ) Milhouse was named after U . S . president Richard Nixon , whose middle name was Milhous .
2024-01-08 04:37:50 +00:00
Thought 3 : Milhouse was named after U . S . president Richard Nixon , so the answer is Richard Nixon .
Action 3 : Finish [ Richard Nixon ]
Question : Which documentary is about Finnish rock groups , Adam Clayton Powell or The Saimaa Gesture ?
Thought 1 : I need to search Adam Clayton Powell and The Saimaa Gesture , and find which documentary is about Finnish rock groups .
Action 1 : Search [ Adam Clayton Powell ]
Observation 1 : Could not find [ Adam Clayton Powell ] . Similar : [ ' Adam Clayton Powell III ' , ' Seventh Avenue (Manhattan) ' , ' Adam Clayton Powell Jr. State Office Building ' , ' Isabel Washington Powell ' , ' Adam Powell ' , ' Adam Clayton Powell (film) ' , ' Giancarlo Esposito ' ] .
Thought 2 : To find the documentary , I can search Adam Clayton Powell ( film ) .
Action 2 : Search [ Adam Clayton Powell ( film ) ]
Observation 2 : Adam Clayton Powell is a 1989 American documentary film directed by Richard Kilberg .
The film is about the rise and fall of influential African - American politician Adam Clayton Powell Jr . [ 3 ] [ 4 ] It was later aired as part of the PBS series The American Experience .
Thought 3 : Adam Clayton Powell ( film ) is a documentary about an African - American politician , not Finnish rock groups . So the documentary about Finnish rock groups must instead be The Saimaa Gesture .
Action 3 : Finish [ The Saimaa Gesture ]
Question : What profession does Nicholas Ray and Elia Kazan have in common ?
Thought 1 : I need to search Nicholas Ray and Elia Kazan , find their professions , then find the profession they have in common .
Action 1 : Search [ Nicholas Ray ]
Observation 1 : Nicholas Ray ( born Raymond Nicholas Kienzle Jr . , August 7 , 1911 – June 16 , 1979 ) was an American film director , screenwriter , and actor best known for the 1955 film Rebel Without a Cause .
Thought 2 : Professions of Nicholas Ray are director , screenwriter , and actor . I need to search Elia Kazan next and find his professions .
Action 2 : Search [ Elia Kazan ]
Observation 2 : Elia Kazan was an American film and theatre director , producer , screenwriter and actor .
Thought 3 : Professions of Elia Kazan are director , producer , screenwriter , and actor . So profession Nicholas Ray and Elia Kazan have in common is director , screenwriter , and actor .
Action 3 : Finish [ director , screenwriter , actor ]
Question : Which magazine was started first Arthur ' s Magazine or First for Women?
Thought 1 : I need to search Arthur ' s Magazine and First for Women, and find which was started first.
Action 1 : Search [ Arthur ' s Magazine]
2024-11-06 21:46:04 +08:00
Observation 1 : Arthur ' s Magazine (1844-1846) was an American literary periodical published in Philadelphia in the 19th century.
2024-01-08 04:37:50 +00:00
Thought 2 : Arthur ' s Magazine was started in 1844. I need to search First for Women next.
Action 2 : Search [ First for Women ]
2024-11-06 21:46:04 +08:00
Observation 2 : First for Women is a woman ' s magazine published by Bauer Media Group in the USA.[1] The magazine was started in 1989.
2024-01-08 04:37:50 +00:00
Thought 3 : First for Women was started in 1989. 1844 ( Arthur ' s Magazine) < 1989 (First for Women), so Arthur ' s Magazine was started first .
Action 3 : Finish [ Arthur ' s Magazine]
Question : Were Pavel Urysohn and Leonid Levin known for the same type of work ?
Thought 1 : I need to search Pavel Urysohn and Leonid Levin , find their types of work , then find if they are the same .
Action 1 : Search [ Pavel Urysohn ]
Observation 1 : Pavel Samuilovich Urysohn ( February 3 , 1898 â August 17 , 1924 ) was a Soviet mathematician who is best known for his contributions in dimension theory .
Thought 2 : Pavel Urysohn is a mathematician . I need to search Leonid Levin next and find its type of work .
Action 2 : Search [ Leonid Levin ]
2024-11-06 21:46:04 +08:00
Observation 2 : Leonid Anatolievich Levin is a Soviet - American mathematician and computer scientist .
Thought 3 : Leonid Levin is a mathematician and computer scientist . So Pavel Urysohn and Leonid Levin have the same type of work .
2024-01-08 04:37:50 +00:00
Action 3 : Finish [ yes ]
2024-04-28 21:06:22 +08:00
"""
+ question
)
2024-01-08 04:37:50 +00:00
return prompt
def main ( args ) :
2024-04-28 21:06:22 +08:00
lines = read_jsonl ( args . data_path ) [ : args . num_questions ]
arguments = [ { " question " : k , " triplets " : v } for l in lines for k , v in l . items ( ) ]
2024-01-08 04:37:50 +00:00
states = [ ]
# Select backend
2024-05-05 16:14:17 +08:00
call_generate = get_call_generate ( args )
2024-01-08 04:37:50 +00:00
def run_single_agent ( argument ) :
question = argument [ " question " ]
triplets = argument [ " triplets " ]
prompt = get_prompt ( question )
for i in range ( 1 , len ( triplets ) + 2 ) :
prompt + = " Thought " + str ( i ) + " : "
states . append ( prompt )
2024-04-28 21:06:22 +08:00
answer = call_generate (
prompt , max_tokens = 200 , temperature = 0 , stop = " Observation "
)
2024-01-08 04:37:50 +00:00
if i > len ( triplets ) :
break
2024-04-28 21:06:22 +08:00
prompt + = (
triplets [ i - 1 ] [ " thought " ]
+ " \n Action "
+ str ( i )
+ " : "
+ triplets [ i - 1 ] [ " action " ]
+ " \n Observation "
+ str ( i )
+ " : "
+ triplets [ i - 1 ] [ " observation " ]
+ " \n "
)
2024-01-08 04:37:50 +00:00
states . append ( answer )
2024-05-05 16:14:17 +08:00
async def run_single_agent_async ( argument ) :
question = argument [ " question " ]
triplets = argument [ " triplets " ]
prompt = get_prompt ( question )
for i in range ( 1 , len ( triplets ) + 2 ) :
prompt + = " Thought " + str ( i ) + " : "
states . append ( prompt )
answer = await call_generate (
prompt , max_tokens = 200 , temperature = 0 , stop = " Observation " , max_len = 4096
)
if i > len ( triplets ) :
break
prompt + = (
triplets [ i - 1 ] [ " thought " ]
+ " \n Action "
+ str ( i )
+ " : "
+ triplets [ i - 1 ] [ " action " ]
+ " \n Observation "
+ str ( i )
+ " : "
+ triplets [ i - 1 ] [ " observation " ]
+ " \n "
)
states . append ( answer )
2025-05-11 14:32:49 -07:00
tic = time . perf_counter ( )
2024-05-05 16:14:17 +08:00
if args . backend != " lmql " :
if args . parallel == 1 :
for arg in tqdm ( arguments ) :
run_single_agent ( arg )
else :
with ThreadPoolExecutor ( args . parallel ) as executor :
list (
tqdm (
executor . map ( run_single_agent , arguments ) , total = len ( arguments )
)
)
2024-01-08 04:37:50 +00:00
else :
2024-05-05 16:14:17 +08:00
import asyncio
loop = asyncio . get_event_loop ( )
batches = [
[ ] for _ in range ( ( len ( arguments ) + args . parallel - 1 ) / / args . parallel )
]
for i , arg in enumerate ( arguments ) :
batches [ i / / args . parallel ] . append ( arg )
for bt in tqdm ( batches ) :
tasks = [ run_single_agent_async ( arg ) for arg in bt ]
loop . run_until_complete ( asyncio . gather ( * tasks ) )
2025-05-11 14:32:49 -07:00
latency = time . perf_counter ( ) - tic
2024-01-08 04:37:50 +00:00
print ( f " Latency: { latency : .3f } " )
# Write results
dump_state_text ( f " tmp_output_ { args . backend } .txt " , states )
with open ( args . result_file , " a " ) as fout :
value = {
" task " : " ReAct Agents " ,
" backend " : args . backend ,
" num_gpus " : 1 ,
" latency " : round ( latency , 3 ) ,
" num_requests " : len ( arguments ) ,
" other " : {
" parallel " : args . parallel ,
} ,
}
fout . write ( json . dumps ( value ) + " \n " )
if __name__ == " __main__ " :
parser = argparse . ArgumentParser ( )
parser . add_argument ( " --data-path " , type = str , default = " hotpotqa_100.jsonl " )
parser . add_argument ( " --num-questions " , type = int , default = 10 )
args = add_common_other_args_and_parse ( parser )
main ( args )