4
.github/workflows/release-docs.yml
vendored
4
.github/workflows/release-docs.yml
vendored
@@ -9,6 +9,10 @@ on:
|
|||||||
- 'python/sglang/version.py'
|
- 'python/sglang/version.py'
|
||||||
workflow_dispatch:
|
workflow_dispatch:
|
||||||
|
|
||||||
|
concurrency:
|
||||||
|
group: execute-notebook-${{ github.ref }}
|
||||||
|
cancel-in-progress: true
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
execute-and-deploy:
|
execute-and-deploy:
|
||||||
runs-on: 1-gpu-runner
|
runs-on: 1-gpu-runner
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
# Minimal makefile for Sphinx documentation
|
# Minimal makefile for Sphinx documentation
|
||||||
#
|
#
|
||||||
|
|
||||||
# You can set these variables from the command line, and also
|
# You can set these variables from the terminal, and also
|
||||||
# from the environment for the first two.
|
# from the environment for the first two.
|
||||||
SPHINXOPTS ?=
|
SPHINXOPTS ?=
|
||||||
SPHINXBUILD ?= sphinx-build
|
SPHINXBUILD ?= sphinx-build
|
||||||
|
|||||||
@@ -4,7 +4,7 @@
|
|||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"# Native Server API\n",
|
"# Native API\n",
|
||||||
"\n",
|
"\n",
|
||||||
"Apart from the OpenAI compatible API, the SGLang Runtime also provides its native server API. We introduce these following API:\n",
|
"Apart from the OpenAI compatible API, the SGLang Runtime also provides its native server API. We introduce these following API:\n",
|
||||||
"\n",
|
"\n",
|
||||||
@@ -254,7 +254,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 11,
|
"execution_count": 22,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
|
|||||||
@@ -36,7 +36,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 1,
|
"execution_count": null,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
@@ -69,7 +69,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 2,
|
"execution_count": null,
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"execution": {
|
"execution": {
|
||||||
"iopub.execute_input": "2024-11-01T02:45:16.624550Z",
|
"iopub.execute_input": "2024-11-01T02:45:16.624550Z",
|
||||||
@@ -110,7 +110,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 3,
|
"execution_count": null,
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"execution": {
|
"execution": {
|
||||||
"iopub.execute_input": "2024-11-01T02:45:18.090228Z",
|
"iopub.execute_input": "2024-11-01T02:45:18.090228Z",
|
||||||
@@ -151,12 +151,12 @@
|
|||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"Streaming mode is also supported"
|
"Streaming mode is also supported."
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 4,
|
"execution_count": null,
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"execution": {
|
"execution": {
|
||||||
"iopub.execute_input": "2024-11-01T02:45:21.195226Z",
|
"iopub.execute_input": "2024-11-01T02:45:21.195226Z",
|
||||||
@@ -190,7 +190,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 5,
|
"execution_count": null,
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"execution": {
|
"execution": {
|
||||||
"iopub.execute_input": "2024-11-01T02:45:21.676813Z",
|
"iopub.execute_input": "2024-11-01T02:45:21.676813Z",
|
||||||
@@ -226,7 +226,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 6,
|
"execution_count": null,
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"execution": {
|
"execution": {
|
||||||
"iopub.execute_input": "2024-11-01T02:45:23.186337Z",
|
"iopub.execute_input": "2024-11-01T02:45:23.186337Z",
|
||||||
@@ -272,7 +272,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 7,
|
"execution_count": null,
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"execution": {
|
"execution": {
|
||||||
"iopub.execute_input": "2024-11-01T02:45:26.772016Z",
|
"iopub.execute_input": "2024-11-01T02:45:26.772016Z",
|
||||||
@@ -334,7 +334,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 8,
|
"execution_count": null,
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"execution": {
|
"execution": {
|
||||||
"iopub.execute_input": "2024-11-01T02:45:26.796422Z",
|
"iopub.execute_input": "2024-11-01T02:45:26.796422Z",
|
||||||
@@ -389,7 +389,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 9,
|
"execution_count": null,
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"execution": {
|
"execution": {
|
||||||
"iopub.execute_input": "2024-11-01T02:45:29.812339Z",
|
"iopub.execute_input": "2024-11-01T02:45:29.812339Z",
|
||||||
@@ -472,7 +472,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 10,
|
"execution_count": null,
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"execution": {
|
"execution": {
|
||||||
"iopub.execute_input": "2024-11-01T02:45:54.854018Z",
|
"iopub.execute_input": "2024-11-01T02:45:54.854018Z",
|
||||||
@@ -567,7 +567,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 6,
|
"execution_count": 11,
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"execution": {
|
"execution": {
|
||||||
"iopub.execute_input": "2024-11-01T02:46:07.896114Z",
|
"iopub.execute_input": "2024-11-01T02:46:07.896114Z",
|
||||||
@@ -587,6 +587,18 @@
|
|||||||
"display_name": "Python 3 (ipykernel)",
|
"display_name": "Python 3 (ipykernel)",
|
||||||
"language": "python",
|
"language": "python",
|
||||||
"name": "python3"
|
"name": "python3"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"codemirror_mode": {
|
||||||
|
"name": "ipython",
|
||||||
|
"version": 3
|
||||||
|
},
|
||||||
|
"file_extension": ".py",
|
||||||
|
"mimetype": "text/x-python",
|
||||||
|
"name": "python",
|
||||||
|
"nbconvert_exporter": "python",
|
||||||
|
"pygments_lexer": "ipython3",
|
||||||
|
"version": "3.11.7"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"nbformat": 4,
|
"nbformat": 4,
|
||||||
|
|||||||
@@ -36,7 +36,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 13,
|
"execution_count": null,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
@@ -63,21 +63,19 @@
|
|||||||
"source": [
|
"source": [
|
||||||
"## Using cURL\n",
|
"## Using cURL\n",
|
||||||
"\n",
|
"\n",
|
||||||
"Once the server is up, you can send test requests using curl."
|
"Once the server is up, you can send test requests using curl or requests."
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 15,
|
"execution_count": null,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"import subprocess\n",
|
"import subprocess\n",
|
||||||
"\n",
|
"\n",
|
||||||
"curl_command = \"\"\"\n",
|
"curl_command = \"\"\"\n",
|
||||||
"curl http://localhost:30010/v1/chat/completions \\\n",
|
"curl -s http://localhost:30010/v1/chat/completions \\\n",
|
||||||
" -H \"Content-Type: application/json\" \\\n",
|
|
||||||
" -H \"Authorization: Bearer None\" \\\n",
|
|
||||||
" -d '{\n",
|
" -d '{\n",
|
||||||
" \"model\": \"meta-llama/Llama-3.2-11B-Vision-Instruct\",\n",
|
" \"model\": \"meta-llama/Llama-3.2-11B-Vision-Instruct\",\n",
|
||||||
" \"messages\": [\n",
|
" \"messages\": [\n",
|
||||||
@@ -109,14 +107,57 @@
|
|||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"## Using OpenAI Python Client\n",
|
"## Using OpenAI Compatible API w/ Requests"
|
||||||
"\n",
|
|
||||||
"You can use the OpenAI Python API library to send requests."
|
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 16,
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"import requests\n",
|
||||||
|
"\n",
|
||||||
|
"url = \"http://localhost:30010/v1/chat/completions\"\n",
|
||||||
|
"\n",
|
||||||
|
"data = {\n",
|
||||||
|
" \"model\": \"meta-llama/Llama-3.2-11B-Vision-Instruct\",\n",
|
||||||
|
" \"messages\": [\n",
|
||||||
|
" {\n",
|
||||||
|
" \"role\": \"user\",\n",
|
||||||
|
" \"content\": [\n",
|
||||||
|
" {\n",
|
||||||
|
" \"type\": \"text\",\n",
|
||||||
|
" \"text\": \"What’s in this image?\"\n",
|
||||||
|
" },\n",
|
||||||
|
" {\n",
|
||||||
|
" \"type\": \"image_url\",\n",
|
||||||
|
" \"image_url\": {\n",
|
||||||
|
" \"url\": \"https://github.com/sgl-project/sglang/blob/main/test/lang/example_image.png?raw=true\"\n",
|
||||||
|
" }\n",
|
||||||
|
" }\n",
|
||||||
|
" ]\n",
|
||||||
|
" }\n",
|
||||||
|
" ],\n",
|
||||||
|
" \"max_tokens\": 300\n",
|
||||||
|
"}\n",
|
||||||
|
"\n",
|
||||||
|
"response = requests.post(url, json=data)\n",
|
||||||
|
"print_highlight(response.text)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Using OpenAI Python Client\n",
|
||||||
|
"\n",
|
||||||
|
"Also, you can use the OpenAI Python API library to send requests."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
@@ -160,7 +201,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 11,
|
"execution_count": null,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
@@ -202,7 +243,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 17,
|
"execution_count": null,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
@@ -233,6 +274,18 @@
|
|||||||
"display_name": "Python 3 (ipykernel)",
|
"display_name": "Python 3 (ipykernel)",
|
||||||
"language": "python",
|
"language": "python",
|
||||||
"name": "python3"
|
"name": "python3"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"codemirror_mode": {
|
||||||
|
"name": "ipython",
|
||||||
|
"version": 3
|
||||||
|
},
|
||||||
|
"file_extension": ".py",
|
||||||
|
"mimetype": "text/x-python",
|
||||||
|
"name": "python",
|
||||||
|
"nbconvert_exporter": "python",
|
||||||
|
"pygments_lexer": "ipython3",
|
||||||
|
"version": "3.11.7"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"nbformat": 4,
|
"nbformat": 4,
|
||||||
|
|||||||
@@ -4,7 +4,7 @@
|
|||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"# Embedding Model\n",
|
"# OpenAI APIs - Embedding\n",
|
||||||
"\n",
|
"\n",
|
||||||
"SGLang supports embedding models in the same way as completion models. Here are some example models:\n",
|
"SGLang supports embedding models in the same way as completion models. Here are some example models:\n",
|
||||||
"\n",
|
"\n",
|
||||||
@@ -62,7 +62,7 @@
|
|||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"## Use Curl"
|
"## Using cURL"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -83,8 +83,6 @@
|
|||||||
"text = \"Once upon a time\"\n",
|
"text = \"Once upon a time\"\n",
|
||||||
"\n",
|
"\n",
|
||||||
"curl_text = f\"\"\"curl -s http://localhost:30010/v1/embeddings \\\n",
|
"curl_text = f\"\"\"curl -s http://localhost:30010/v1/embeddings \\\n",
|
||||||
" -H \"Content-Type: application/json\" \\\n",
|
|
||||||
" -H \"Authorization: Bearer None\" \\\n",
|
|
||||||
" -d '{{\"model\": \"Alibaba-NLP/gte-Qwen2-7B-instruct\", \"input\": \"{text}\"}}'\"\"\"\n",
|
" -d '{{\"model\": \"Alibaba-NLP/gte-Qwen2-7B-instruct\", \"input\": \"{text}\"}}'\"\"\"\n",
|
||||||
"\n",
|
"\n",
|
||||||
"text_embedding = json.loads(subprocess.check_output(curl_text, shell=True))[\"data\"][0][\n",
|
"text_embedding = json.loads(subprocess.check_output(curl_text, shell=True))[\"data\"][0][\n",
|
||||||
@@ -98,7 +96,37 @@
|
|||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"## Using OpenAI Compatible API"
|
"## Using OpenAI Compatible API w/ Requests"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"import requests\n",
|
||||||
|
"\n",
|
||||||
|
"text = \"Once upon a time\"\n",
|
||||||
|
"\n",
|
||||||
|
"response = requests.post(\n",
|
||||||
|
" \"http://localhost:30010/v1/embeddings\",\n",
|
||||||
|
" json={\n",
|
||||||
|
" \"model\": \"Alibaba-NLP/gte-Qwen2-7B-instruct\",\n",
|
||||||
|
" \"input\": text\n",
|
||||||
|
" }\n",
|
||||||
|
")\n",
|
||||||
|
"\n",
|
||||||
|
"text_embedding = response.json()[\"data\"][0][\"embedding\"]\n",
|
||||||
|
"\n",
|
||||||
|
"print_highlight(f\"Text embedding (first 10): {text_embedding[:10]}\")"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Using OpenAI Python Client"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -160,8 +188,6 @@
|
|||||||
"input_ids = tokenizer.encode(text)\n",
|
"input_ids = tokenizer.encode(text)\n",
|
||||||
"\n",
|
"\n",
|
||||||
"curl_ids = f\"\"\"curl -s http://localhost:30010/v1/embeddings \\\n",
|
"curl_ids = f\"\"\"curl -s http://localhost:30010/v1/embeddings \\\n",
|
||||||
" -H \"Content-Type: application/json\" \\\n",
|
|
||||||
" -H \"Authorization: Bearer None\" \\\n",
|
|
||||||
" -d '{{\"model\": \"Alibaba-NLP/gte-Qwen2-7B-instruct\", \"input\": {json.dumps(input_ids)}}}'\"\"\"\n",
|
" -d '{{\"model\": \"Alibaba-NLP/gte-Qwen2-7B-instruct\", \"input\": {json.dumps(input_ids)}}}'\"\"\"\n",
|
||||||
"\n",
|
"\n",
|
||||||
"input_ids_embedding = json.loads(subprocess.check_output(curl_ids, shell=True))[\"data\"][\n",
|
"input_ids_embedding = json.loads(subprocess.check_output(curl_ids, shell=True))[\"data\"][\n",
|
||||||
@@ -173,7 +199,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 5,
|
"execution_count": null,
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"execution": {
|
"execution": {
|
||||||
"iopub.execute_input": "2024-11-01T02:48:01.875204Z",
|
"iopub.execute_input": "2024-11-01T02:48:01.875204Z",
|
||||||
@@ -25,6 +25,7 @@ The core features include:
|
|||||||
|
|
||||||
backend/openai_api_completions.ipynb
|
backend/openai_api_completions.ipynb
|
||||||
backend/openai_api_vision.ipynb
|
backend/openai_api_vision.ipynb
|
||||||
|
backend/openai_embedding_api.ipynb
|
||||||
backend/native_api.ipynb
|
backend/native_api.ipynb
|
||||||
backend/backend.md
|
backend/backend.md
|
||||||
|
|
||||||
|
|||||||
@@ -22,12 +22,12 @@
|
|||||||
"--port 30000 --host 0.0.0.0\n",
|
"--port 30000 --host 0.0.0.0\n",
|
||||||
"```\n",
|
"```\n",
|
||||||
"\n",
|
"\n",
|
||||||
"in your terminal and wait for the server to be ready."
|
"in your terminal and wait for the server to be ready. Once the server is running, you can send test requests using curl or requests. The server implements the [OpenAI-compatible API](https://platform.openai.com/docs/api-reference/chat)."
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 7,
|
"execution_count": null,
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"execution": {
|
"execution": {
|
||||||
"iopub.execute_input": "2024-11-01T02:46:13.611212Z",
|
"iopub.execute_input": "2024-11-01T02:46:13.611212Z",
|
||||||
@@ -59,14 +59,36 @@
|
|||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"## Send a Request\n",
|
"## Using cURL\n"
|
||||||
"\n",
|
|
||||||
"Once the server is up, you can send test requests using curl. The server implements the [OpenAI-compatible API](https://platform.openai.com/docs/api-reference/)."
|
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 9,
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"import subprocess, json\n",
|
||||||
|
"\n",
|
||||||
|
"curl_command = \"\"\"\n",
|
||||||
|
"curl -s http://localhost:30000/v1/chat/completions \\\n",
|
||||||
|
" -d '{\"model\": \"meta-llama/Meta-Llama-3.1-8B-Instruct\", \"messages\": [{\"role\": \"system\", \"content\": \"You are a helpful assistant.\"}, {\"role\": \"user\", \"content\": \"What is a LLM?\"}]}'\n",
|
||||||
|
"\"\"\"\n",
|
||||||
|
"\n",
|
||||||
|
"response = json.loads(subprocess.check_output(curl_command, shell=True))\n",
|
||||||
|
"print_highlight(response)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Using OpenAI Compatible API w/ Requests"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"execution": {
|
"execution": {
|
||||||
"iopub.execute_input": "2024-11-01T02:46:42.813656Z",
|
"iopub.execute_input": "2024-11-01T02:46:42.813656Z",
|
||||||
@@ -77,30 +99,20 @@
|
|||||||
},
|
},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"import subprocess\n",
|
"import requests\n",
|
||||||
"\n",
|
"\n",
|
||||||
"curl_command = \"\"\"\n",
|
"url = \"http://localhost:30000/v1/chat/completions\"\n",
|
||||||
"curl http://localhost:30000/v1/chat/completions \\\\\n",
|
"\n",
|
||||||
" -H \"Content-Type: application/json\" \\\\\n",
|
"data = {\n",
|
||||||
" -H \"Authorization: Bearer None\" \\\\\n",
|
|
||||||
" -d '{\n",
|
|
||||||
" \"model\": \"meta-llama/Meta-Llama-3.1-8B-Instruct\",\n",
|
" \"model\": \"meta-llama/Meta-Llama-3.1-8B-Instruct\",\n",
|
||||||
" \"messages\": [\n",
|
" \"messages\": [\n",
|
||||||
" {\n",
|
" {\"role\": \"system\", \"content\": \"You are a helpful assistant.\"},\n",
|
||||||
" \"role\": \"system\",\n",
|
" {\"role\": \"user\", \"content\": \"What is a LLM?\"}\n",
|
||||||
" \"content\": \"You are a helpful assistant.\"\n",
|
|
||||||
" },\n",
|
|
||||||
" {\n",
|
|
||||||
" \"role\": \"user\",\n",
|
|
||||||
" \"content\": \"What is an LLM? Tell me in one sentence.\"\n",
|
|
||||||
" }\n",
|
|
||||||
" ]\n",
|
" ]\n",
|
||||||
" }'\n",
|
"}\n",
|
||||||
"\"\"\"\n",
|
|
||||||
"\n",
|
"\n",
|
||||||
"response = subprocess.check_output(curl_command, shell=True).decode()\n",
|
"response = requests.post(url, json=data)\n",
|
||||||
"\n",
|
"print_highlight(response.json())"
|
||||||
"print_highlight(response)"
|
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -109,12 +121,12 @@
|
|||||||
"source": [
|
"source": [
|
||||||
"## Using OpenAI Python Client\n",
|
"## Using OpenAI Python Client\n",
|
||||||
"\n",
|
"\n",
|
||||||
"You can use the OpenAI Python API library to send requests."
|
"You can also use the OpenAI Python API library to send requests."
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 3,
|
"execution_count": null,
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"execution": {
|
"execution": {
|
||||||
"iopub.execute_input": "2024-11-01T02:46:51.439372Z",
|
"iopub.execute_input": "2024-11-01T02:46:51.439372Z",
|
||||||
@@ -138,7 +150,6 @@
|
|||||||
" temperature=0,\n",
|
" temperature=0,\n",
|
||||||
" max_tokens=64,\n",
|
" max_tokens=64,\n",
|
||||||
")\n",
|
")\n",
|
||||||
"\n",
|
|
||||||
"print_highlight(response)"
|
"print_highlight(response)"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
@@ -148,13 +159,12 @@
|
|||||||
"source": [
|
"source": [
|
||||||
"## Using Native Generation APIs\n",
|
"## Using Native Generation APIs\n",
|
||||||
"\n",
|
"\n",
|
||||||
"You can also use the native `/generate` endpoint. It provides more flexiblity.\n",
|
"You can also use the native `/generate` endpoint with requests, which provides more flexiblity. An API reference is available at [Sampling Parameters](https://sgl-project.github.io/references/sampling_params.html)."
|
||||||
"An API reference is available at [Sampling Parameters](https://sgl-project.github.io/references/sampling_params.html)."
|
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 5,
|
"execution_count": null,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
@@ -196,6 +206,18 @@
|
|||||||
"display_name": "Python 3 (ipykernel)",
|
"display_name": "Python 3 (ipykernel)",
|
||||||
"language": "python",
|
"language": "python",
|
||||||
"name": "python3"
|
"name": "python3"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"codemirror_mode": {
|
||||||
|
"name": "ipython",
|
||||||
|
"version": 3
|
||||||
|
},
|
||||||
|
"file_extension": ".py",
|
||||||
|
"mimetype": "text/x-python",
|
||||||
|
"name": "python",
|
||||||
|
"nbconvert_exporter": "python",
|
||||||
|
"pygments_lexer": "ipython3",
|
||||||
|
"version": "3.11.7"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"nbformat": 4,
|
"nbformat": 4,
|
||||||
|
|||||||
Reference in New Issue
Block a user