diff --git a/benchmark/latency_throughput/README.md b/benchmark/latency_throughput/README.md index 32d2aa6d0..52b70664c 100644 --- a/benchmark/latency_throughput/README.md +++ b/benchmark/latency_throughput/README.md @@ -19,7 +19,7 @@ python3 -m vllm.entrypoints.api_server --model meta-llama/Llama-2-7b-chat-hf --d ``` ``` -python3 bench_throughput.py --backend vllm --tokenizer meta-llama/Llama-2-7b-chat-hf --dataset ShareGPT_V3_unfiltered_cleaned_split.json --num-prompts 10 --request-rate 10 +python3 bench_throughput.py --backend vllm --tokenizer meta-llama/Llama-2-7b-chat-hf --dataset ShareGPT_V3_unfiltered_cleaned_split.json --num-prompts 10 --request-rate 10 --port 21000 ``` diff --git a/examples/usage/parallel_sample.py b/examples/usage/parallel_sample.py new file mode 100644 index 000000000..ff5a86cbc --- /dev/null +++ b/examples/usage/parallel_sample.py @@ -0,0 +1,40 @@ +import sglang as sgl + + +@sgl.function +def parallel_sample(s, question, n): + s += ( + "Question: Compute 1 + 2 + 3\n" + "Reasoning: I need to use a calculator.\n" + "Tool: calculator\n" + "Answer: 6\n" + + "Question: Compute 3 + 2 + 2\n" + "Reasoning: I will try a calculator.\n" + "Tool: calculator\n" + "Answer: 7\n" + ) + s += "Question: " + question + "\n" + forks = s.fork(n) + forks += "Reasoning:" + sgl.gen("reasoning", stop="\n") + "\n" + forks += "Tool:" + sgl.gen("tool", choices=["calculator", "browser"]) + "\n" + forks += "Answer:" + sgl.gen("answer", stop="\n") + "\n" + forks.join() + + +sgl.set_default_backend(sgl.OpenAI("gpt-3.5-turbo-instruct")) +#sgl.set_default_backend(sgl.RuntimeEndpoint("http://localhost:30000")) + +state = parallel_sample.run( + question="Compute 5 + 2 + 4.", + n=5, + temperature=1.0 +) + +for i in range(5): + obj = { + "reasoning": state["reasoning"][i], + "tool": state["tool"][i], + "answer": state["answer"][i], + } + print(f"[{i}], {obj}") diff --git a/python/sglang/api.py b/python/sglang/api.py index 208077941..410cb6fb4 100644 --- a/python/sglang/api.py +++ b/python/sglang/api.py @@ -50,7 +50,7 @@ def gen( regex: Optional[str] = None, ): if choices: - return SglSelect(name, choices, temperature) + return SglSelect(name, choices, 0.0 if temperature is None else temperature) # check regex is valid if regex is not None: