Files
joke-finetome-model-gguf-ph…/inference/hf_endpoint_client.py

24 lines
611 B
Python
Raw Permalink Normal View History

import os
from huggingface_hub import InferenceClient
# Required env vars:
# export HF_TOKEN="..."
# export HF_ENDPOINT_BASE_URL="https://xxxx.endpoints.huggingface.cloud"
client = InferenceClient(
base_url=os.environ["HF_ENDPOINT_BASE_URL"],
api_key=os.environ["HF_TOKEN"],
)
resp = client.chat.completions.create(
messages=[
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": "Write a short joke in the style of The Office."},
],
max_tokens=2560,
temperature=1.2,
top_p=0.95,
)
print(resp.choices[0].message.content)