2024-05-18 22:23:53 -07:00
"""
Usage :
2024-05-20 17:01:13 -07:00
* * * Note : for speculative execution to work , user must put all " gen " in " assistant " .
Show in " assistant " the desired answer format . Each " gen " term should have a stop token .
The stream mode is not supported in speculative execution .
2024-05-18 22:23:53 -07:00
E . g .
correct :
sgl . assistant ( " \n Name: " + sgl . gen ( " name " , stop = " \n " ) + " \n Birthday: " + sgl . gen ( " birthday " , stop = " \n " ) + " \n Job: " + sgl . gen ( " job " , stop = " \n " ) )
2024-05-20 17:01:13 -07:00
incorrect :
2024-05-18 22:23:53 -07:00
s + = sgl . assistant ( " \n Name: " + sgl . gen ( " name " , stop = " \n " ) )
s + = sgl . assistant ( " \n Birthday: " + sgl . gen ( " birthday " , stop = " \n " ) )
s + = sgl . assistant ( " \n Job: " + sgl . gen ( " job " , stop = " \n " ) )
export OPENAI_API_KEY = sk - * * * * * *
2024-05-20 17:01:13 -07:00
python3 openai_chat_speculative . py
2024-05-18 22:23:53 -07:00
"""
import sglang as sgl
2024-05-20 17:01:13 -07:00
from sglang import function , set_default_backend , OpenAI
2024-05-18 22:23:53 -07:00
2024-05-20 18:44:23 -07:00
@function ( num_api_spec_tokens = 256 )
2024-05-18 22:23:53 -07:00
def gen_character_spec ( s ) :
s + = sgl . system ( " You are a helpful assistant. " )
s + = sgl . user ( " Construct a character within the following format: " )
s + = sgl . assistant ( " Name: Steve Jobs. \n Birthday: February 24, 1955. \n Job: Apple CEO. \n " )
s + = sgl . user ( " Please generate new Name, Birthday and Job. \n " )
s + = sgl . assistant ( " Name: " + sgl . gen ( " name " , stop = " \n " ) + " \n Birthday: " + sgl . gen ( " birthday " , stop = " \n " ) + " \n Job: " + sgl . gen ( " job " , stop = " \n " ) )
2024-05-20 18:44:23 -07:00
@function ( num_api_spec_tokens = 256 )
2024-05-18 22:23:53 -07:00
def gen_character_spec_no_few_shot ( s ) :
s + = sgl . user ( " Construct a character. For each field stop with a newline \n " )
s + = sgl . assistant ( " Name: " + sgl . gen ( " name " , stop = " \n " ) + " \n Age: " + sgl . gen ( " age " , stop = " \n " ) + " \n Job: " + sgl . gen ( " job " , stop = " \n " ) )
@function
def gen_character_normal ( s ) :
s + = sgl . system ( " You are a helpful assistant. " )
s + = sgl . user ( " What ' s the answer of 23 + 8? " )
s + = sgl . assistant ( sgl . gen ( " answer " , max_tokens = 64 ) )
2024-05-20 18:44:23 -07:00
@function ( num_api_spec_tokens = 1024 )
2024-05-18 22:23:53 -07:00
def multi_turn_question ( s , question_1 , question_2 ) :
s + = sgl . system ( " You are a helpful assistant. " )
s + = sgl . user ( " Answer questions in the following format: " )
s + = sgl . user ( " Question 1: What is the capital of France? \n Question 2: What is the population of this city? \n " )
s + = sgl . assistant ( " Answer 1: The capital of France is Paris. \n Answer 2: The population of Paris in 2024 is estimated to be around 2.1 million for the city proper. \n " )
2024-05-20 17:01:13 -07:00
s + = sgl . user ( " Question 1: " + question_1 + " \n Question 2: " + question_2 )
s + = sgl . assistant ( " Answer 1: " + sgl . gen ( " answer_1 " , stop = " \n " ) + " \n Answer 2: " + sgl . gen ( " answer_2 " , stop = " \n " ) )
2024-05-18 22:23:53 -07:00
def test_spec_single_turn ( ) :
2024-05-20 17:01:13 -07:00
backend . token_usage . reset ( )
2024-05-18 22:23:53 -07:00
state = gen_character_spec . run ( )
for m in state . messages ( ) :
print ( m [ " role " ] , " : " , m [ " content " ] )
print ( " \n -- name: " , state [ " name " ] )
2024-05-20 17:01:13 -07:00
print ( " -- birthday: " , state [ " birthday " ] )
print ( " -- job: " , state [ " job " ] )
print ( backend . token_usage )
2024-05-18 22:23:53 -07:00
def test_inaccurate_spec_single_turn ( ) :
state = gen_character_spec_no_few_shot . run ( )
for m in state . messages ( ) :
print ( m [ " role " ] , " : " , m [ " content " ] )
print ( " \n -- name: " , state [ " name " ] )
print ( " \n -- age: " , state [ " age " ] )
print ( " \n -- job: " , state [ " job " ] )
def test_normal_single_turn ( ) :
state = gen_character_normal . run ( )
for m in state . messages ( ) :
print ( m [ " role " ] , " : " , m [ " content " ] )
def test_spec_multi_turn ( ) :
state = multi_turn_question . run (
question_1 = " What is the capital of the United States? " ,
question_2 = " List two local attractions in the capital of the United States. " ,
)
for m in state . messages ( ) :
print ( m [ " role " ] , " : " , m [ " content " ] )
print ( " \n -- answer_1 -- \n " , state [ " answer_1 " ] )
print ( " \n -- answer_2 -- \n " , state [ " answer_2 " ] )
def test_spec_multi_turn_stream ( ) :
state = multi_turn_question . run (
question_1 = " What is the capital of the United States? " ,
question_2 = " List two local attractions. " ,
stream = True
)
for out in state . text_iter ( ) :
print ( out , end = " " , flush = True )
if __name__ == " __main__ " :
2024-05-20 17:01:13 -07:00
backend = OpenAI ( " gpt-4-turbo " )
set_default_backend ( backend )
2024-05-18 22:23:53 -07:00
print ( " \n ========== test spec single turn ========== \n " )
# expect reasonable answer for each field
test_spec_single_turn ( )
print ( " \n ========== test inaccurate spec single turn ========== \n " )
# expect incomplete or unreasonable answers
test_inaccurate_spec_single_turn ( )
print ( " \n ========== test normal single turn ========== \n " )
# expect reasonable answer
test_normal_single_turn ( )
print ( " \n ========== test spec multi turn ========== \n " )
# expect answer with same format as in the few shot
test_spec_multi_turn ( )
print ( " \n ========== test spec multi turn stream ========== \n " )
# expect error in stream_executor: stream is not supported...
2024-05-20 17:01:13 -07:00
test_spec_multi_turn_stream ( )