2024-01-08 04:37:50 +00:00
"""
2024-01-29 17:05:42 -08:00
Usage :
2024-01-08 04:37:50 +00:00
python3 - m sglang . launch_server - - model - path liuhaotian / llava - v1 .5 - 7 b - - tokenizer - path llava - hf / llava - 1.5 - 7 b - hf - - port 30000
2024-01-29 17:05:42 -08:00
python3 test_httpserver_llava . py
2024-01-08 04:37:50 +00:00
Output :
The image features a man standing on the back of a yellow taxi cab , holding
"""
import argparse
import asyncio
import json
2024-04-23 22:36:33 +08:00
import time
2024-01-08 04:37:50 +00:00
import aiohttp
import requests
async def send_request ( url , data , delay = 0 ) :
await asyncio . sleep ( delay )
async with aiohttp . ClientSession ( ) as session :
async with session . post ( url , json = data ) as resp :
output = await resp . json ( )
return output
async def test_concurrent ( args ) :
url = f " { args . host } : { args . port } "
response = [ ]
for i in range ( 8 ) :
response . append (
send_request (
url + " /generate " ,
{
" text " : " A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human ' s questions. USER: <image> \n Describe this picture ASSISTANT: " ,
2024-05-12 04:54:07 -07:00
" image_data " : " example_image.png " ,
2024-01-08 04:37:50 +00:00
" sampling_params " : {
" temperature " : 0 ,
" max_new_tokens " : 16 ,
} ,
} ,
)
)
rets = await asyncio . gather ( * response )
for ret in rets :
print ( ret [ " text " ] )
def test_streaming ( args ) :
url = f " { args . host } : { args . port } "
response = requests . post (
url + " /generate " ,
json = {
" text " : " A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human ' s questions. USER: <image> \n Describe this picture ASSISTANT: " ,
2024-05-12 04:54:07 -07:00
" image_data " : " example_image.png " ,
2024-01-08 04:37:50 +00:00
" sampling_params " : {
" temperature " : 0 ,
" max_new_tokens " : 128 ,
} ,
" stream " : True ,
} ,
stream = True ,
)
prev = 0
2024-01-29 17:05:42 -08:00
for chunk in response . iter_lines ( decode_unicode = False ) :
chunk = chunk . decode ( " utf-8 " )
if chunk and chunk . startswith ( " data: " ) :
if chunk == " data: [DONE] " :
break
data = json . loads ( chunk [ 5 : ] . strip ( " \n " ) )
2024-01-08 04:37:50 +00:00
output = data [ " text " ] . strip ( )
print ( output [ prev : ] , end = " " , flush = True )
prev = len ( output )
print ( " " )
if __name__ == " __main__ " :
parser = argparse . ArgumentParser ( )
parser . add_argument ( " --host " , type = str , default = " http://127.0.0.1 " )
parser . add_argument ( " --port " , type = int , default = 30000 )
args = parser . parse_args ( )
asyncio . run ( test_concurrent ( args ) )
test_streaming ( args )