import os from huggingface_hub import InferenceClient # Required env vars: # export HF_TOKEN="..." # export HF_ENDPOINT_BASE_URL="https://xxxx.endpoints.huggingface.cloud" client = InferenceClient( base_url=os.environ["HF_ENDPOINT_BASE_URL"], api_key=os.environ["HF_TOKEN"], ) resp = client.chat.completions.create( messages=[ {"role": "system", "content": "You are a helpful assistant."}, {"role": "user", "content": "Write a short joke in the style of The Office."}, ], max_tokens=2560, temperature=1.2, top_p=0.95, ) print(resp.choices[0].message.content)