sglangv0.5.2 & support Qwen3-Next-80B-A3B-Instruct

This commit is contained in:
maxiao1
2025-09-13 17:00:20 +08:00
commit 118f1fc726
2037 changed files with 515371 additions and 0 deletions

View File

@@ -0,0 +1,35 @@
"""
This example demonstrates how to use `min_tokens` to enforce sgl.gen to generate a longer sequence
Usage:
python3 sgl_gen_min_tokens.py
"""
import sglang as sgl
@sgl.function
def long_answer(s):
s += sgl.user("What is the capital of the United States?")
s += sgl.assistant(sgl.gen("answer", min_tokens=64, max_tokens=128))
@sgl.function
def short_answer(s):
s += sgl.user("What is the capital of the United States?")
s += sgl.assistant(sgl.gen("answer"))
if __name__ == "__main__":
runtime = sgl.Runtime(model_path="meta-llama/Meta-Llama-3.1-8B-Instruct")
sgl.set_default_backend(runtime)
state = long_answer.run()
print("=" * 20)
print("Longer Answer", state["answer"])
state = short_answer.run()
print("=" * 20)
print("Short Answer", state["answer"])
runtime.shutdown()