68 lines
2.2 KiB
Python
68 lines
2.2 KiB
Python
"""
|
|
OpenAI-compatible LoRA adapter usage with SGLang.
|
|
|
|
Server Setup:
|
|
python -m sglang.launch_server \\
|
|
--model meta-llama/Llama-3.1-8B-Instruct \\
|
|
--enable-lora \\
|
|
--lora-paths sql=/path/to/sql python=/path/to/python
|
|
"""
|
|
|
|
import openai
|
|
|
|
client = openai.Client(base_url="http://127.0.0.1:30000/v1", api_key="EMPTY")
|
|
|
|
|
|
def main():
|
|
print("SGLang OpenAI-Compatible LoRA Examples\n")
|
|
|
|
# Example 1: NEW - Adapter in model parameter (OpenAI-compatible)
|
|
print("1. Chat with LoRA adapter in model parameter:")
|
|
response = client.chat.completions.create(
|
|
model="meta-llama/Llama-3.1-8B-Instruct:sql", # ← adapter:name syntax
|
|
messages=[{"role": "user", "content": "Convert to SQL: show all users"}],
|
|
max_tokens=50,
|
|
)
|
|
print(f" Response: {response.choices[0].message.content}\n")
|
|
|
|
# Example 2: Completions API with adapter
|
|
print("2. Completion with LoRA adapter:")
|
|
response = client.completions.create(
|
|
model="meta-llama/Llama-3.1-8B-Instruct:python",
|
|
prompt="def fibonacci(n):",
|
|
max_tokens=50,
|
|
)
|
|
print(f" Response: {response.choices[0].text}\n")
|
|
|
|
# Example 3: OLD - Backward compatible with explicit lora_path
|
|
print("3. Backward compatible (explicit lora_path):")
|
|
response = client.chat.completions.create(
|
|
model="meta-llama/Llama-3.1-8B-Instruct",
|
|
messages=[{"role": "user", "content": "Convert to SQL: show all users"}],
|
|
extra_body={"lora_path": "sql"},
|
|
max_tokens=50,
|
|
)
|
|
print(f" Response: {response.choices[0].message.content}\n")
|
|
|
|
# Example 4: Base model (no adapter)
|
|
print("4. Base model without adapter:")
|
|
response = client.chat.completions.create(
|
|
model="meta-llama/Llama-3.1-8B-Instruct",
|
|
messages=[{"role": "user", "content": "Hello!"}],
|
|
max_tokens=30,
|
|
)
|
|
print(f" Response: {response.choices[0].message.content}\n")
|
|
|
|
print("All examples completed!")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
try:
|
|
main()
|
|
except Exception as e:
|
|
print(f"Error: {e}")
|
|
print(
|
|
"\nEnsure server is running:\n"
|
|
" python -m sglang.launch_server --model ... --enable-lora --lora-paths ..."
|
|
)
|