@@ -22,12 +22,12 @@
|
||||
"--port 30000 --host 0.0.0.0\n",
|
||||
"```\n",
|
||||
"\n",
|
||||
"in your terminal and wait for the server to be ready."
|
||||
"in your terminal and wait for the server to be ready. Once the server is running, you can send test requests using curl or requests. The server implements the [OpenAI-compatible API](https://platform.openai.com/docs/api-reference/chat)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2024-11-01T02:46:13.611212Z",
|
||||
@@ -59,14 +59,36 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Send a Request\n",
|
||||
"\n",
|
||||
"Once the server is up, you can send test requests using curl. The server implements the [OpenAI-compatible API](https://platform.openai.com/docs/api-reference/)."
|
||||
"## Using cURL\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import subprocess, json\n",
|
||||
"\n",
|
||||
"curl_command = \"\"\"\n",
|
||||
"curl -s http://localhost:30000/v1/chat/completions \\\n",
|
||||
" -d '{\"model\": \"meta-llama/Meta-Llama-3.1-8B-Instruct\", \"messages\": [{\"role\": \"system\", \"content\": \"You are a helpful assistant.\"}, {\"role\": \"user\", \"content\": \"What is a LLM?\"}]}'\n",
|
||||
"\"\"\"\n",
|
||||
"\n",
|
||||
"response = json.loads(subprocess.check_output(curl_command, shell=True))\n",
|
||||
"print_highlight(response)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Using OpenAI Compatible API w/ Requests"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2024-11-01T02:46:42.813656Z",
|
||||
@@ -77,30 +99,20 @@
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import subprocess\n",
|
||||
"import requests\n",
|
||||
"\n",
|
||||
"curl_command = \"\"\"\n",
|
||||
"curl http://localhost:30000/v1/chat/completions \\\\\n",
|
||||
" -H \"Content-Type: application/json\" \\\\\n",
|
||||
" -H \"Authorization: Bearer None\" \\\\\n",
|
||||
" -d '{\n",
|
||||
"url = \"http://localhost:30000/v1/chat/completions\"\n",
|
||||
"\n",
|
||||
"data = {\n",
|
||||
" \"model\": \"meta-llama/Meta-Llama-3.1-8B-Instruct\",\n",
|
||||
" \"messages\": [\n",
|
||||
" {\n",
|
||||
" \"role\": \"system\",\n",
|
||||
" \"content\": \"You are a helpful assistant.\"\n",
|
||||
" },\n",
|
||||
" {\n",
|
||||
" \"role\": \"user\",\n",
|
||||
" \"content\": \"What is an LLM? Tell me in one sentence.\"\n",
|
||||
" }\n",
|
||||
" {\"role\": \"system\", \"content\": \"You are a helpful assistant.\"},\n",
|
||||
" {\"role\": \"user\", \"content\": \"What is a LLM?\"}\n",
|
||||
" ]\n",
|
||||
" }'\n",
|
||||
"\"\"\"\n",
|
||||
"}\n",
|
||||
"\n",
|
||||
"response = subprocess.check_output(curl_command, shell=True).decode()\n",
|
||||
"\n",
|
||||
"print_highlight(response)"
|
||||
"response = requests.post(url, json=data)\n",
|
||||
"print_highlight(response.json())"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -109,12 +121,12 @@
|
||||
"source": [
|
||||
"## Using OpenAI Python Client\n",
|
||||
"\n",
|
||||
"You can use the OpenAI Python API library to send requests."
|
||||
"You can also use the OpenAI Python API library to send requests."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2024-11-01T02:46:51.439372Z",
|
||||
@@ -138,7 +150,6 @@
|
||||
" temperature=0,\n",
|
||||
" max_tokens=64,\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"print_highlight(response)"
|
||||
]
|
||||
},
|
||||
@@ -148,13 +159,12 @@
|
||||
"source": [
|
||||
"## Using Native Generation APIs\n",
|
||||
"\n",
|
||||
"You can also use the native `/generate` endpoint. It provides more flexiblity.\n",
|
||||
"An API reference is available at [Sampling Parameters](https://sgl-project.github.io/references/sampling_params.html)."
|
||||
"You can also use the native `/generate` endpoint with requests, which provides more flexiblity. An API reference is available at [Sampling Parameters](https://sgl-project.github.io/references/sampling_params.html)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -196,6 +206,18 @@
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.7"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
Reference in New Issue
Block a user