2024-07-31 04:40:51 +08:00
"""
Usage :
2024-09-28 14:43:35 -07:00
2024-07-31 04:40:51 +08:00
python - m sglang . launch_server - - model - path meta - llama / Llama - 2 - 7 b - chat - hf - - port 30000
python openai_batch_chat . py
2024-09-28 14:43:35 -07:00
2024-07-31 04:40:51 +08:00
Note : Before running this script ,
you should create the input . jsonl file with the following content :
{ " custom_id " : " request-1 " , " method " : " POST " , " url " : " /v1/chat/completions " , " body " : { " model " : " gpt-3.5-turbo-0125 " , " messages " : [ { " role " : " system " , " content " : " You are a helpful assistant. " } , { " role " : " user " , " content " : " Hello world! List 3 NBA players and tell a story " } ] , " max_tokens " : 300 } }
{ " custom_id " : " request-2 " , " method " : " POST " , " url " : " /v1/chat/completions " , " body " : { " model " : " gpt-3.5-turbo-0125 " , " messages " : [ { " role " : " system " , " content " : " You are an assistant. " } , { " role " : " user " , " content " : " Hello world! List three capital and tell a story " } ] , " max_tokens " : 500 } }
"""
2024-07-30 04:07:18 +08:00
import json
import time
import openai
class OpenAIBatchProcessor :
2024-09-28 14:43:35 -07:00
def __init__ ( self ) :
2024-07-30 04:07:18 +08:00
client = openai . Client ( base_url = " http://127.0.0.1:30000/v1 " , api_key = " EMPTY " )
self . client = client
def process_batch ( self , input_file_path , endpoint , completion_window ) :
# Upload the input file
with open ( input_file_path , " rb " ) as file :
uploaded_file = self . client . files . create ( file = file , purpose = " batch " )
# Create the batch job
batch_job = self . client . batches . create (
input_file_id = uploaded_file . id ,
endpoint = endpoint ,
completion_window = completion_window ,
)
# Monitor the batch job status
while batch_job . status not in [ " completed " , " failed " , " cancelled " ] :
time . sleep ( 3 ) # Wait for 3 seconds before checking the status again
print (
f " Batch job status: { batch_job . status } ...trying again in 3 seconds... "
)
batch_job = self . client . batches . retrieve ( batch_job . id )
# Check the batch job status and errors
if batch_job . status == " failed " :
print ( f " Batch job failed with status: { batch_job . status } " )
print ( f " Batch job errors: { batch_job . errors } " )
return None
# If the batch job is completed, process the results
if batch_job . status == " completed " :
# print result of batch job
print ( " batch " , batch_job . request_counts )
result_file_id = batch_job . output_file_id
# Retrieve the file content from the server
file_response = self . client . files . content ( result_file_id )
result_content = file_response . read ( ) # Read the content of the file
# Save the content to a local file
result_file_name = " batch_job_chat_results.jsonl "
with open ( result_file_name , " wb " ) as file :
file . write ( result_content ) # Write the binary content to the file
# Load data from the saved JSONL file
results = [ ]
with open ( result_file_name , " r " , encoding = " utf-8 " ) as file :
for line in file :
json_object = json . loads (
line . strip ( )
) # Parse each line as a JSON object
results . append ( json_object )
return results
else :
print ( f " Batch job failed with status: { batch_job . status } " )
return None
# Initialize the OpenAIBatchProcessor
2024-09-28 14:43:35 -07:00
processor = OpenAIBatchProcessor ( )
2024-07-30 04:07:18 +08:00
# Process the batch job
input_file_path = " input.jsonl "
endpoint = " /v1/chat/completions "
completion_window = " 24h "
# Process the batch job
results = processor . process_batch ( input_file_path , endpoint , completion_window )
# Print the results
print ( results )