Imporve openai api documents (#1827)

Co-authored-by: Chayenne <zhaochenyang@g.ucla.edu>
This commit is contained in:
Chayenne
2024-10-30 00:39:41 -07:00
committed by GitHub
parent 5e00ddebc0
commit 539df95d2c
8 changed files with 857 additions and 207 deletions

View File

@@ -6,7 +6,9 @@
"source": [
"# OpenAI Compatible API\n",
"\n",
"SGLang provides an OpenAI compatible API for smooth transition from OpenAI services.\n",
"SGLang provides an OpenAI compatible API for smooth transition from OpenAI services. Full reference of the API is available at [OpenAI API Reference](https://platform.openai.com/docs/api-reference).\n",
"\n",
"This tutorial aims at these popular APIs:\n",
"\n",
"- `chat/completions`\n",
"- `completions`\n",
@@ -27,42 +29,99 @@
},
{
"cell_type": "code",
"execution_count": 38,
"execution_count": 1,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Server is ready. Proceeding with the next steps.\n"
"/home/chenyang/miniconda3/envs/AlphaMeemory/lib/python3.11/site-packages/transformers/utils/hub.py:127: FutureWarning: Using `TRANSFORMERS_CACHE` is deprecated and will be removed in v5 of Transformers. Use `HF_HOME` instead.\n",
" warnings.warn(\n",
"[2024-10-28 02:02:31] server_args=ServerArgs(model_path='meta-llama/Meta-Llama-3.1-8B-Instruct', tokenizer_path='meta-llama/Meta-Llama-3.1-8B-Instruct', tokenizer_mode='auto', skip_tokenizer_init=False, load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization=None, context_length=None, device='cuda', served_model_name='meta-llama/Meta-Llama-3.1-8B-Instruct', chat_template=None, is_embedding=False, host='0.0.0.0', port=30000, mem_fraction_static=0.88, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=8192, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, tp_size=1, stream_interval=1, random_seed=800169736, constrained_json_whitespace_pattern=None, log_level='info', log_level_http=None, log_requests=False, show_time_cost=False, api_key=None, file_storage_pth='SGLang_storage', enable_cache_report=False, watchdog_timeout=600, dp_size=1, load_balance_method='round_robin', dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', disable_flashinfer=False, disable_flashinfer_sampling=False, disable_radix_cache=False, disable_regex_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_penalizer=False, disable_nan_detection=False, enable_overlap_schedule=False, enable_mixed_chunk=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=160, torchao_config='', enable_p2p_check=False, triton_attention_reduce_in_fp32=False, num_continuous_decode_steps=1)\n",
"/home/chenyang/miniconda3/envs/AlphaMeemory/lib/python3.11/site-packages/transformers/utils/hub.py:127: FutureWarning: Using `TRANSFORMERS_CACHE` is deprecated and will be removed in v5 of Transformers. Use `HF_HOME` instead.\n",
" warnings.warn(\n",
"/home/chenyang/miniconda3/envs/AlphaMeemory/lib/python3.11/site-packages/transformers/utils/hub.py:127: FutureWarning: Using `TRANSFORMERS_CACHE` is deprecated and will be removed in v5 of Transformers. Use `HF_HOME` instead.\n",
" warnings.warn(\n",
"[2024-10-28 02:02:36 TP0] Init torch distributed begin.\n",
"[2024-10-28 02:02:37 TP0] Load weight begin. avail mem=47.27 GB\n",
"[2024-10-28 02:02:37 TP0] Ignore import error when loading sglang.srt.models.mllama. No module named 'transformers.models.mllama'\n",
"INFO 10-28 02:02:38 weight_utils.py:236] Using model weights format ['*.safetensors']\n",
"Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]\n",
"Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:00<00:01, 2.57it/s]\n",
"Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:00<00:00, 2.45it/s]\n",
"Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:00<00:00, 3.53it/s]\n",
"Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:01<00:00, 2.98it/s]\n",
"Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:01<00:00, 2.94it/s]\n",
"\n",
"[2024-10-28 02:02:40 TP0] Load weight end. type=LlamaForCausalLM, dtype=torch.bfloat16, avail mem=32.22 GB\n",
"[2024-10-28 02:02:40 TP0] Memory pool end. avail mem=4.60 GB\n",
"[2024-10-28 02:02:40 TP0] Capture cuda graph begin. This can take up to several minutes.\n",
"[2024-10-28 02:02:48 TP0] max_total_num_tokens=217512, max_prefill_tokens=16384, max_running_requests=2049, context_len=131072\n",
"[2024-10-28 02:02:48] INFO: Started server process [1185529]\n",
"[2024-10-28 02:02:48] INFO: Waiting for application startup.\n",
"[2024-10-28 02:02:48] INFO: Application startup complete.\n",
"[2024-10-28 02:02:48] INFO: Uvicorn running on http://0.0.0.0:30000 (Press CTRL+C to quit)\n",
"[2024-10-28 02:02:48] INFO: 127.0.0.1:47904 - \"GET /v1/models HTTP/1.1\" 200 OK\n"
]
},
{
"data": {
"text/html": [
"<strong style='color: #00008B;'>Server is ready. Proceeding with the next steps.</strong>"
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"from sglang.utils import execute_shell_command, wait_for_server, terminate_process\n",
"\n",
"server_process = execute_shell_command(\n",
" \"\"\"\n",
"python -m sglang.launch_server --model-path meta-llama/Meta-Llama-3.1-8B-Instruct \\\n",
"--port 30000 --host 0.0.0.0 --log-level warning\n",
"\"\"\"\n",
"from sglang.utils import (\n",
" execute_shell_command,\n",
" wait_for_server,\n",
" terminate_process,\n",
" print_highlight,\n",
")\n",
"\n",
"wait_for_server(\"http://localhost:30000\")\n",
"print(\"Server is ready. Proceeding with the next steps.\")"
"server_process = execute_shell_command(\n",
" command=\"python -m sglang.launch_server --model-path meta-llama/Meta-Llama-3.1-8B-Instruct --port 30000 --host 0.0.0.0\"\n",
")\n",
"\n",
"wait_for_server(\"http://localhost:30000\")"
]
},
{
"cell_type": "code",
"execution_count": 39,
"execution_count": 2,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"ChatCompletion(id='e854540ec7914b2d8c712f16fd9ed2ca', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content='Here are 3 countries and their capitals:\\n\\n1. **Country:** Japan\\n**Capital:** Tokyo\\n\\n2. **Country:** Australia\\n**Capital:** Canberra\\n\\n3. **Country:** Brazil\\n**Capital:** Brasília', refusal=None, role='assistant', function_call=None, tool_calls=None), matched_stop=128009)], created=1730012326, model='meta-llama/Meta-Llama-3.1-8B-Instruct', object='chat.completion', service_tier=None, system_fingerprint=None, usage=CompletionUsage(completion_tokens=46, prompt_tokens=49, total_tokens=95, prompt_tokens_details=None))\n"
"[2024-10-28 02:02:49 TP0] Prefill batch. #new-seq: 1, #new-token: 49, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0\n",
"[2024-10-28 02:02:49] INFO: 127.0.0.1:47912 - \"GET /get_model_info HTTP/1.1\" 200 OK\n",
"[2024-10-28 02:02:49 TP0] Prefill batch. #new-seq: 1, #new-token: 6, #cached-token: 1, cache hit rate: 1.79%, token usage: 0.00, #running-req: 1, #queue-req: 0\n",
"[2024-10-28 02:02:49] INFO: 127.0.0.1:47926 - \"POST /generate HTTP/1.1\" 200 OK\n",
"[2024-10-28 02:02:49] The server is fired up and ready to roll!\n",
"[2024-10-28 02:02:50 TP0] Decode batch. #running-req: 1, #token: 89, token usage: 0.00, gen throughput (token/s): 24.12, #queue-req: 0\n",
"[2024-10-28 02:02:50] INFO: 127.0.0.1:47910 - \"POST /v1/chat/completions HTTP/1.1\" 200 OK\n"
]
},
{
"data": {
"text/html": [
"<strong style='color: #00008B;'>Response: ChatCompletion(id='692899ebd3ea464dbb456008a7d60bf3', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content='Here are 3 countries and their capitals:\\n\\n1. **Country:** Japan\\n**Capital:** Tokyo\\n\\n2. **Country:** Australia\\n**Capital:** Canberra\\n\\n3. **Country:** Brazil\\n**Capital:** Brasília', refusal=None, role='assistant', function_call=None, tool_calls=None), matched_stop=128009)], created=1730106170, model='meta-llama/Meta-Llama-3.1-8B-Instruct', object='chat.completion', service_tier=None, system_fingerprint=None, usage=CompletionUsage(completion_tokens=46, prompt_tokens=49, total_tokens=95, prompt_tokens_details=None))</strong>"
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
@@ -84,7 +143,8 @@
" temperature=0,\n",
" max_tokens=64,\n",
")\n",
"print(response)"
"\n",
"print_highlight(f\"Response: {response}\")"
]
},
{
@@ -93,40 +153,35 @@
"source": [
"### Parameters\n",
"\n",
"The chat completions API accepts the following parameters (refer to [OpenAI Chat Completions API](https://platform.openai.com/docs/api-reference/chat/create) for more details):\n",
"\n",
"- `messages`: List of messages in the conversation, each containing `role` and `content`\n",
"- `model`: The model identifier to use for completion\n",
"- `max_tokens`: Maximum number of tokens to generate in the response\n",
"- `temperature`: Controls randomness (0-2). Lower values make output more focused and deterministic\n",
"- `top_p`: Alternative to temperature. Controls diversity via nucleus sampling\n",
"- `n`: Number of chat completion choices to generate\n",
"- `stream`: If true, partial message deltas will be sent as they become available\n",
"- `stop`: Sequences where the API will stop generating further tokens\n",
"- `presence_penalty`: Penalizes new tokens based on their presence in the text so far (-2.0 to 2.0)\n",
"- `frequency_penalty`: Penalizes new tokens based on their frequency in the text so far (-2.0 to 2.0)\n",
"- `logit_bias`: Modify the likelihood of specified tokens appearing in the completion\n",
"- `logprobs`: Include log probabilities of tokens in the response\n",
"- `top_logprobs`: Number of most likely tokens to return probabilities for\n",
"- `seed`: Random seed for deterministic results\n",
"- `response_format`: Specify the format of the response (e.g., JSON)\n",
"- `stream_options`: Additional options for streaming responses\n",
"- `user`: A unique identifier representing your end-user\n",
"The chat completions API accepts OpenAI Chat Completions API's parameters. Refer to [OpenAI Chat Completions API](https://platform.openai.com/docs/api-reference/chat/create) for more details.\n",
"\n",
"Here is an example of a detailed chat completion request:"
]
},
{
"cell_type": "code",
"execution_count": 40,
"execution_count": 3,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Ancient Rome's major achievements include:"
"[2024-10-28 02:02:50 TP0] Prefill batch. #new-seq: 1, #new-token: 48, #cached-token: 28, cache hit rate: 21.97%, token usage: 0.00, #running-req: 0, #queue-req: 0\n",
"[2024-10-28 02:02:50] INFO: 127.0.0.1:47910 - \"POST /v1/chat/completions HTTP/1.1\" 200 OK\n"
]
},
{
"data": {
"text/html": [
"<strong style='color: #00008B;'>Response: ChatCompletion(id='bffa083869484c78ab89d334514d5af3', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content=\"Ancient Rome's major achievements include:\", refusal=None, role='assistant', function_call=None, tool_calls=None), matched_stop='\\n\\n')], created=1730106170, model='meta-llama/Meta-Llama-3.1-8B-Instruct', object='chat.completion', service_tier=None, system_fingerprint=None, usage=CompletionUsage(completion_tokens=8, prompt_tokens=76, total_tokens=84, prompt_tokens_details=None))</strong>"
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
@@ -152,11 +207,9 @@
" frequency_penalty=0.2, # Mild penalty for more natural language\n",
" n=1, # Single response is usually more stable\n",
" seed=42, # Keep for reproducibility\n",
" stream=True, # Keep streaming for real-time output\n",
")\n",
"\n",
"for chunk in response:\n",
" print(chunk.choices[0].delta.content or \"\", end=\"\")"
"print_highlight(f\"Response: {response}\")"
]
},
{
@@ -167,20 +220,34 @@
"\n",
"### Usage\n",
"\n",
"Completions API is similar to Chat Completions API, but without the `messages` parameter. Refer to [OpenAI Completions API](https://platform.openai.com/docs/api-reference/completions/create) for more details."
"Completions API is similar to Chat Completions API, but without the `messages` parameter."
]
},
{
"cell_type": "code",
"execution_count": 41,
"execution_count": 4,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Completion(id='a6e07198f4b445baa0fb08a2178ceb59', choices=[CompletionChoice(finish_reason='length', index=0, logprobs=None, text=' 1. 2. 3.\\n1. United States - Washington D.C. 2. Japan - Tokyo 3. Australia - Canberra\\nList 3 countries and their capitals. 1. 2. 3.\\n1. China - Beijing 2. Brazil - Bras', matched_stop=None)], created=1730012328, model='meta-llama/Meta-Llama-3.1-8B-Instruct', object='text_completion', system_fingerprint=None, usage=CompletionUsage(completion_tokens=64, prompt_tokens=9, total_tokens=73, prompt_tokens_details=None))\n"
"[2024-10-28 02:02:50 TP0] Prefill batch. #new-seq: 1, #new-token: 8, #cached-token: 1, cache hit rate: 21.28%, token usage: 0.00, #running-req: 0, #queue-req: 0\n",
"[2024-10-28 02:02:51 TP0] Decode batch. #running-req: 1, #token: 37, token usage: 0.00, gen throughput (token/s): 38.07, #queue-req: 0\n",
"[2024-10-28 02:02:52] INFO: 127.0.0.1:47910 - \"POST /v1/completions HTTP/1.1\" 200 OK\n"
]
},
{
"data": {
"text/html": [
"<strong style='color: #00008B;'>Response: Completion(id='eb486d0a32fd4384baba923f3bc17e8b', choices=[CompletionChoice(finish_reason='length', index=0, logprobs=None, text=' 1. 2. 3.\\n1. United States - Washington D.C. 2. Japan - Tokyo 3. Australia - Canberra\\nList 3 countries and their capitals. 1. 2. 3.\\n1. China - Beijing 2. Brazil - Bras', matched_stop=None)], created=1730106172, model='meta-llama/Meta-Llama-3.1-8B-Instruct', object='text_completion', system_fingerprint=None, usage=CompletionUsage(completion_tokens=64, prompt_tokens=9, total_tokens=73, prompt_tokens_details=None))</strong>"
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
@@ -192,7 +259,8 @@
" n=1,\n",
" stop=None,\n",
")\n",
"print(response)"
"\n",
"print_highlight(f\"Response: {response}\")"
]
},
{
@@ -201,44 +269,39 @@
"source": [
"### Parameters\n",
"\n",
"The completions API accepts the following parameters:\n",
"\n",
"- `model`: The model identifier to use for completion\n",
"- `prompt`: Input text to generate completions for. Can be a string, array of strings, or token arrays\n",
"- `best_of`: Number of completions to generate server-side and return the best one\n",
"- `echo`: If true, the prompt will be included in the response\n",
"- `frequency_penalty`: Penalizes new tokens based on their frequency in the text so far (-2.0 to 2.0)\n",
"- `logit_bias`: Modify the likelihood of specified tokens appearing in the completion\n",
"- `logprobs`: Include log probabilities of tokens in the response\n",
"- `max_tokens`: Maximum number of tokens to generate in the response (default: 16)\n",
"- `n`: Number of completion choices to generate\n",
"- `presence_penalty`: Penalizes new tokens based on their presence in the text so far (-2.0 to 2.0)\n",
"- `seed`: Random seed for deterministic results\n",
"- `stop`: Sequences where the API will stop generating further tokens\n",
"- `stream`: If true, partial completion deltas will be sent as they become available\n",
"- `stream_options`: Additional options for streaming responses\n",
"- `suffix`: Text to append to the completion\n",
"- `temperature`: Controls randomness (0-2). Lower values make output more focused and deterministic\n",
"- `top_p`: Alternative to temperature. Controls diversity via nucleus sampling\n",
"- `user`: A unique identifier representing your end-user\n",
"The completions API accepts OpenAI Completions API's parameters. Refer to [OpenAI Completions API](https://platform.openai.com/docs/api-reference/completions/create) for more details.\n",
"\n",
"Here is an example of a detailed completions request:"
]
},
{
"cell_type": "code",
"execution_count": 42,
"execution_count": 5,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" Space explorer, Captain Orion Blackwood, had been traveling through the galaxy for 12 years, searching for a new home for humanity. His ship, the Aurora, had been his home for so long that he barely remembered what it was like to walk on solid ground.\n",
"As he navigated through the dense asteroid field, the ship's computer, S.A.R.A. (Self-Aware Reasoning Algorithm), alerted him to a strange reading on one of the asteroids. Captain Blackwood's curiosity was piqued, and he decided to investigate further.\n",
"\"Captain, I'm detecting unusual energy signatures emanating from the asteroid,\" S.A.R.A. said. \"It's unlike anything I've seen before.\"\n",
"Captain Blackwood's eyes narrowed as"
"[2024-10-28 02:02:52 TP0] Prefill batch. #new-seq: 1, #new-token: 9, #cached-token: 1, cache hit rate: 20.53%, token usage: 0.00, #running-req: 0, #queue-req: 0\n",
"[2024-10-28 02:02:52 TP0] Decode batch. #running-req: 1, #token: 15, token usage: 0.00, gen throughput (token/s): 40.91, #queue-req: 0\n",
"[2024-10-28 02:02:53 TP0] Decode batch. #running-req: 1, #token: 55, token usage: 0.00, gen throughput (token/s): 42.13, #queue-req: 0\n",
"[2024-10-28 02:02:54 TP0] Decode batch. #running-req: 1, #token: 95, token usage: 0.00, gen throughput (token/s): 42.10, #queue-req: 0\n",
"[2024-10-28 02:02:55 TP0] Decode batch. #running-req: 1, #token: 135, token usage: 0.00, gen throughput (token/s): 41.94, #queue-req: 0\n",
"[2024-10-28 02:02:55] INFO: 127.0.0.1:47910 - \"POST /v1/completions HTTP/1.1\" 200 OK\n"
]
},
{
"data": {
"text/html": [
"<strong style='color: #00008B;'>Response: Completion(id='fb23a12a15bc4137815b91d63b6fd976', choices=[CompletionChoice(finish_reason='length', index=0, logprobs=None, text=\" Here is a short story about a space explorer named Astrid.\\nAstrid had always been fascinated by the stars. As a child, she would spend hours gazing up at the night sky, dreaming of what lay beyond our small planet. Now, as a renowned space explorer, she had the chance to explore the cosmos firsthand.\\nAstrid's ship, the Aurora, was equipped with state-of-the-art technology that allowed her to traverse vast distances in a relatively short period of time. She had been traveling for weeks, and finally, she had reached her destination: a distant planet on the edge of the galaxy.\\nAs she entered the planet's atmosphere, Astrid felt a thrill of excitement. She had never seen anything like this before.\", matched_stop=None)], created=1730106175, model='meta-llama/Meta-Llama-3.1-8B-Instruct', object='text_completion', system_fingerprint=None, usage=CompletionUsage(completion_tokens=150, prompt_tokens=10, total_tokens=160, prompt_tokens_details=None))</strong>"
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
@@ -253,11 +316,9 @@
" frequency_penalty=0.3, # Reduce repetitive phrases\n",
" n=1, # Generate one completion\n",
" seed=123, # For reproducible results\n",
" stream=True, # Stream the response\n",
")\n",
"\n",
"for chunk in response:\n",
" print(chunk.choices[0].text or \"\", end=\"\")"
"print_highlight(f\"Response: {response}\")"
]
},
{
@@ -279,15 +340,29 @@
},
{
"cell_type": "code",
"execution_count": 43,
"execution_count": 6,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Batch job created with ID: batch_03d7f74f-dffe-4c26-b5e7-bb9fb5cb89ff\n"
"[2024-10-28 02:02:55] INFO: 127.0.0.1:43330 - \"POST /v1/files HTTP/1.1\" 200 OK\n",
"[2024-10-28 02:02:55] INFO: 127.0.0.1:43330 - \"POST /v1/batches HTTP/1.1\" 200 OK\n",
"[2024-10-28 02:02:55 TP0] Prefill batch. #new-seq: 2, #new-token: 30, #cached-token: 50, cache hit rate: 35.06%, token usage: 0.00, #running-req: 0, #queue-req: 0\n"
]
},
{
"data": {
"text/html": [
"<strong style='color: #00008B;'>Batch job created with ID: batch_56fefd2e-0187-4c14-aa2d-110917723dde</strong>"
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
@@ -337,29 +412,91 @@
" completion_window=\"24h\",\n",
")\n",
"\n",
"print(f\"Batch job created with ID: {batch_response.id}\")"
"print_highlight(f\"Batch job created with ID: {batch_response.id}\")"
]
},
{
"cell_type": "code",
"execution_count": 44,
"execution_count": 7,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[2024-10-28 02:02:56 TP0] Decode batch. #running-req: 2, #token: 82, token usage: 0.00, gen throughput (token/s): 55.10, #queue-req: 0\n",
"Batch job status: validating...trying again in 3 seconds...\n",
"[2024-10-28 02:02:58] INFO: 127.0.0.1:43330 - \"GET /v1/batches/batch_56fefd2e-0187-4c14-aa2d-110917723dde HTTP/1.1\" 200 OK\n",
"Batch job completed successfully!\n",
"Request counts: BatchRequestCounts(completed=2, failed=0, total=2)\n",
"\n",
"Request request-1:\n",
"Response: {'status_code': 200, 'request_id': 'request-1', 'body': {'id': 'request-1', 'object': 'chat.completion', 'created': 1730012333, 'model': 'meta-llama/Meta-Llama-3.1-8B-Instruct', 'choices': {'index': 0, 'message': {'role': 'assistant', 'content': 'Why do programmers prefer dark mode?\\n\\nBecause light attracts bugs.'}, 'logprobs': None, 'finish_reason': 'stop', 'matched_stop': 128009}, 'usage': {'prompt_tokens': 41, 'completion_tokens': 13, 'total_tokens': 54}, 'system_fingerprint': None}}\n",
"\n",
"Request request-2:\n",
"Response: {'status_code': 200, 'request_id': 'request-2', 'body': {'id': 'request-2', 'object': 'chat.completion', 'created': 1730012333, 'model': 'meta-llama/Meta-Llama-3.1-8B-Instruct', 'choices': {'index': 0, 'message': {'role': 'assistant', 'content': '**What is Python?**\\n\\nPython is a high-level, interpreted programming language that is widely used for various purposes, including:\\n\\n* **Web Development**: Building web applications, web services, and web scraping.\\n* **Data Science**: Data analysis'}, 'logprobs': None, 'finish_reason': 'length', 'matched_stop': None}, 'usage': {'prompt_tokens': 39, 'completion_tokens': 50, 'total_tokens': 89}, 'system_fingerprint': None}}\n",
"\n",
"Cleaning up files...\n"
"[2024-10-28 02:02:58] INFO: 127.0.0.1:43330 - \"GET /v1/files/backend_result_file-520da6c8-0cce-4d4c-a943-a86101f5f5b4/content HTTP/1.1\" 200 OK\n"
]
},
{
"data": {
"text/html": [
"<strong style='color: #00008B;'>Request request-1:</strong>"
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"<strong style='color: #00008B;'>Response: {'status_code': 200, 'request_id': 'request-1', 'body': {'id': 'request-1', 'object': 'chat.completion', 'created': 1730106176, 'model': 'meta-llama/Meta-Llama-3.1-8B-Instruct', 'choices': {'index': 0, 'message': {'role': 'assistant', 'content': 'A programmer walks into a library and asks the librarian, \"Do you have any books on Pavlov\\'s dogs and Schrödinger\\'s cat?\"\\n\\nThe librarian replies, \"It rings a bell, but I\\'m not sure if it\\'s here'}, 'logprobs': None, 'finish_reason': 'length', 'matched_stop': None}, 'usage': {'prompt_tokens': 41, 'completion_tokens': 50, 'total_tokens': 91}, 'system_fingerprint': None}}</strong>"
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"<strong style='color: #00008B;'>Request request-2:</strong>"
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"<strong style='color: #00008B;'>Response: {'status_code': 200, 'request_id': 'request-2', 'body': {'id': 'request-2', 'object': 'chat.completion', 'created': 1730106176, 'model': 'meta-llama/Meta-Llama-3.1-8B-Instruct', 'choices': {'index': 0, 'message': {'role': 'assistant', 'content': '**What is Python?**\\n\\nPython is a high-level, interpreted programming language that is widely used for various purposes, including:\\n\\n1. **Web Development**: Building web applications and web services using frameworks like Django and Flask.\\n2. **Data Analysis and'}, 'logprobs': None, 'finish_reason': 'length', 'matched_stop': None}, 'usage': {'prompt_tokens': 39, 'completion_tokens': 50, 'total_tokens': 89}, 'system_fingerprint': None}}</strong>"
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"<strong style='color: #00008B;'>Cleaning up files...</strong>"
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"[2024-10-28 02:02:58] INFO: 127.0.0.1:43330 - \"DELETE /v1/files/backend_result_file-520da6c8-0cce-4d4c-a943-a86101f5f5b4 HTTP/1.1\" 200 OK\n"
]
}
],
@@ -382,16 +519,16 @@
" ]\n",
"\n",
" for result in results:\n",
" print(f\"\\nRequest {result['custom_id']}:\")\n",
" print(f\"Response: {result['response']}\")\n",
" print_highlight(f\"Request {result['custom_id']}:\")\n",
" print_highlight(f\"Response: {result['response']}\")\n",
"\n",
" print(\"\\nCleaning up files...\")\n",
" print_highlight(\"Cleaning up files...\")\n",
" # Only delete the result file ID since file_response is just content\n",
" client.files.delete(result_file_id)\n",
"else:\n",
" print(f\"Batch job failed with status: {batch_response.status}\")\n",
" print_highlight(f\"Batch job failed with status: {batch_response.status}\")\n",
" if hasattr(batch_response, \"errors\"):\n",
" print(f\"Errors: {batch_response.errors}\")"
" print_highlight(f\"Errors: {batch_response.errors}\")"
]
},
{
@@ -408,66 +545,210 @@
},
{
"cell_type": "code",
"execution_count": 45,
"execution_count": 8,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Created batch job with ID: batch_6b9625ac-9ebc-4c4f-bfd5-f84f88b0100d\n",
"Initial status: validating\n",
"Batch job details (check 1/5):\n",
"ID: batch_6b9625ac-9ebc-4c4f-bfd5-f84f88b0100d\n",
"Status: in_progress\n",
"Created at: 1730012334\n",
"Input file ID: backend_input_file-8203d42a-109c-4573-9663-13b5d9cb6a2b\n",
"Output file ID: None\n",
"Request counts:\n",
"Total: 0\n",
"Completed: 0\n",
"Failed: 0\n",
"Batch job details (check 2/5):\n",
"ID: batch_6b9625ac-9ebc-4c4f-bfd5-f84f88b0100d\n",
"Status: in_progress\n",
"Created at: 1730012334\n",
"Input file ID: backend_input_file-8203d42a-109c-4573-9663-13b5d9cb6a2b\n",
"Output file ID: None\n",
"Request counts:\n",
"Total: 0\n",
"Completed: 0\n",
"Failed: 0\n",
"Batch job details (check 3/5):\n",
"ID: batch_6b9625ac-9ebc-4c4f-bfd5-f84f88b0100d\n",
"Status: in_progress\n",
"Created at: 1730012334\n",
"Input file ID: backend_input_file-8203d42a-109c-4573-9663-13b5d9cb6a2b\n",
"Output file ID: None\n",
"Request counts:\n",
"Total: 0\n",
"Completed: 0\n",
"Failed: 0\n",
"Batch job details (check 4/5):\n",
"ID: batch_6b9625ac-9ebc-4c4f-bfd5-f84f88b0100d\n",
"Status: completed\n",
"Created at: 1730012334\n",
"Input file ID: backend_input_file-8203d42a-109c-4573-9663-13b5d9cb6a2b\n",
"Output file ID: backend_result_file-d32f441d-e737-4da3-b07a-c39349425b3a\n",
"Request counts:\n",
"Total: 100\n",
"Completed: 100\n",
"Failed: 0\n",
"Batch job details (check 5/5):\n",
"ID: batch_6b9625ac-9ebc-4c4f-bfd5-f84f88b0100d\n",
"Status: completed\n",
"Created at: 1730012334\n",
"Input file ID: backend_input_file-8203d42a-109c-4573-9663-13b5d9cb6a2b\n",
"Output file ID: backend_result_file-d32f441d-e737-4da3-b07a-c39349425b3a\n",
"Request counts:\n",
"Total: 100\n",
"Completed: 100\n",
"Failed: 0\n"
"[2024-10-28 02:02:58] INFO: 127.0.0.1:43336 - \"POST /v1/files HTTP/1.1\" 200 OK\n",
"[2024-10-28 02:02:58] INFO: 127.0.0.1:43336 - \"POST /v1/batches HTTP/1.1\" 200 OK\n"
]
},
{
"data": {
"text/html": [
"<strong style='color: #00008B;'>Created batch job with ID: batch_67da0e16-e7b2-4a75-9f7a-58c033e739e5</strong>"
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"<strong style='color: #00008B;'>Initial status: validating</strong>"
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"[2024-10-28 02:02:58 TP0] Prefill batch. #new-seq: 17, #new-token: 510, #cached-token: 425, cache hit rate: 43.40%, token usage: 0.00, #running-req: 0, #queue-req: 0\n",
"[2024-10-28 02:02:58 TP0] Prefill batch. #new-seq: 83, #new-token: 2490, #cached-token: 2075, cache hit rate: 45.04%, token usage: 0.00, #running-req: 17, #queue-req: 0\n",
"[2024-10-28 02:02:59 TP0] Decode batch. #running-req: 100, #token: 3725, token usage: 0.02, gen throughput (token/s): 234.43, #queue-req: 0\n",
"[2024-10-28 02:03:00 TP0] Decode batch. #running-req: 100, #token: 7725, token usage: 0.04, gen throughput (token/s): 3545.41, #queue-req: 0\n",
"[2024-10-28 02:03:01 TP0] Decode batch. #running-req: 100, #token: 11725, token usage: 0.05, gen throughput (token/s): 3448.10, #queue-req: 0\n",
"[2024-10-28 02:03:02 TP0] Decode batch. #running-req: 100, #token: 15725, token usage: 0.07, gen throughput (token/s): 3362.62, #queue-req: 0\n",
"[2024-10-28 02:03:04 TP0] Decode batch. #running-req: 100, #token: 19725, token usage: 0.09, gen throughput (token/s): 3279.58, #queue-req: 0\n",
"[2024-10-28 02:03:05 TP0] Decode batch. #running-req: 100, #token: 23725, token usage: 0.11, gen throughput (token/s): 3200.86, #queue-req: 0\n",
"[2024-10-28 02:03:06 TP0] Decode batch. #running-req: 100, #token: 27725, token usage: 0.13, gen throughput (token/s): 3126.52, #queue-req: 0\n",
"[2024-10-28 02:03:07 TP0] Decode batch. #running-req: 100, #token: 31725, token usage: 0.15, gen throughput (token/s): 3053.16, #queue-req: 0\n",
"[2024-10-28 02:03:08] INFO: 127.0.0.1:41320 - \"GET /v1/batches/batch_67da0e16-e7b2-4a75-9f7a-58c033e739e5 HTTP/1.1\" 200 OK\n"
]
},
{
"data": {
"text/html": [
"<strong style='color: #00008B;'>Batch job details (check 1 / 5) // ID: batch_67da0e16-e7b2-4a75-9f7a-58c033e739e5 // Status: in_progress // Created at: 1730106178 // Input file ID: backend_input_file-92cf2cc1-afbd-428f-8c5c-85fabd86cb63 // Output file ID: None</strong>"
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"<strong style='color: #00008B;'><strong>Request counts: Total: 0 // Completed: 0 // Failed: 0</strong></strong>"
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"[2024-10-28 02:03:09 TP0] Decode batch. #running-req: 100, #token: 35725, token usage: 0.16, gen throughput (token/s): 2980.26, #queue-req: 0\n",
"[2024-10-28 02:03:10 TP0] Decode batch. #running-req: 100, #token: 39725, token usage: 0.18, gen throughput (token/s): 2919.09, #queue-req: 0\n",
"[2024-10-28 02:03:11] INFO: 127.0.0.1:41320 - \"GET /v1/batches/batch_67da0e16-e7b2-4a75-9f7a-58c033e739e5 HTTP/1.1\" 200 OK\n"
]
},
{
"data": {
"text/html": [
"<strong style='color: #00008B;'>Batch job details (check 2 / 5) // ID: batch_67da0e16-e7b2-4a75-9f7a-58c033e739e5 // Status: in_progress // Created at: 1730106178 // Input file ID: backend_input_file-92cf2cc1-afbd-428f-8c5c-85fabd86cb63 // Output file ID: None</strong>"
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"<strong style='color: #00008B;'><strong>Request counts: Total: 0 // Completed: 0 // Failed: 0</strong></strong>"
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"[2024-10-28 02:03:11 TP0] Decode batch. #running-req: 100, #token: 43725, token usage: 0.20, gen throughput (token/s): 2854.92, #queue-req: 0\n",
"[2024-10-28 02:03:13 TP0] Decode batch. #running-req: 100, #token: 47725, token usage: 0.22, gen throughput (token/s): 2794.62, #queue-req: 0\n",
"[2024-10-28 02:03:14] INFO: 127.0.0.1:41320 - \"GET /v1/batches/batch_67da0e16-e7b2-4a75-9f7a-58c033e739e5 HTTP/1.1\" 200 OK\n"
]
},
{
"data": {
"text/html": [
"<strong style='color: #00008B;'>Batch job details (check 3 / 5) // ID: batch_67da0e16-e7b2-4a75-9f7a-58c033e739e5 // Status: in_progress // Created at: 1730106178 // Input file ID: backend_input_file-92cf2cc1-afbd-428f-8c5c-85fabd86cb63 // Output file ID: None</strong>"
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"<strong style='color: #00008B;'><strong>Request counts: Total: 0 // Completed: 0 // Failed: 0</strong></strong>"
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"[2024-10-28 02:03:14 TP0] Decode batch. #running-req: 100, #token: 51725, token usage: 0.24, gen throughput (token/s): 2737.84, #queue-req: 0\n",
"[2024-10-28 02:03:17] INFO: 127.0.0.1:41320 - \"GET /v1/batches/batch_67da0e16-e7b2-4a75-9f7a-58c033e739e5 HTTP/1.1\" 200 OK\n"
]
},
{
"data": {
"text/html": [
"<strong style='color: #00008B;'>Batch job details (check 4 / 5) // ID: batch_67da0e16-e7b2-4a75-9f7a-58c033e739e5 // Status: completed // Created at: 1730106178 // Input file ID: backend_input_file-92cf2cc1-afbd-428f-8c5c-85fabd86cb63 // Output file ID: backend_result_file-c10ee9f5-eca8-4357-a922-934543b7f433</strong>"
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"<strong style='color: #00008B;'><strong>Request counts: Total: 100 // Completed: 100 // Failed: 0</strong></strong>"
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"[2024-10-28 02:03:20] INFO: 127.0.0.1:41320 - \"GET /v1/batches/batch_67da0e16-e7b2-4a75-9f7a-58c033e739e5 HTTP/1.1\" 200 OK\n"
]
},
{
"data": {
"text/html": [
"<strong style='color: #00008B;'>Batch job details (check 5 / 5) // ID: batch_67da0e16-e7b2-4a75-9f7a-58c033e739e5 // Status: completed // Created at: 1730106178 // Input file ID: backend_input_file-92cf2cc1-afbd-428f-8c5c-85fabd86cb63 // Output file ID: backend_result_file-c10ee9f5-eca8-4357-a922-934543b7f433</strong>"
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"<strong style='color: #00008B;'><strong>Request counts: Total: 100 // Completed: 100 // Failed: 0</strong></strong>"
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
@@ -515,25 +796,21 @@
" completion_window=\"24h\",\n",
")\n",
"\n",
"print(f\"Created batch job with ID: {batch_job.id}\")\n",
"print(f\"Initial status: {batch_job.status}\")\n",
"print_highlight(f\"Created batch job with ID: {batch_job.id}\")\n",
"print_highlight(f\"Initial status: {batch_job.status}\")\n",
"\n",
"time.sleep(10)\n",
"\n",
"max_checks = 5\n",
"for i in range(max_checks):\n",
" batch_details = client.batches.retrieve(batch_id=batch_job.id)\n",
" print(f\"Batch job details (check {i+1}/{max_checks}):\")\n",
" print(f\"ID: {batch_details.id}\")\n",
" print(f\"Status: {batch_details.status}\")\n",
" print(f\"Created at: {batch_details.created_at}\")\n",
" print(f\"Input file ID: {batch_details.input_file_id}\")\n",
" print(f\"Output file ID: {batch_details.output_file_id}\")\n",
"\n",
" print(\"Request counts:\")\n",
" print(f\"Total: {batch_details.request_counts.total}\")\n",
" print(f\"Completed: {batch_details.request_counts.completed}\")\n",
" print(f\"Failed: {batch_details.request_counts.failed}\")\n",
" print_highlight(\n",
" f\"Batch job details (check {i+1} / {max_checks}) // ID: {batch_details.id} // Status: {batch_details.status} // Created at: {batch_details.created_at} // Input file ID: {batch_details.input_file_id} // Output file ID: {batch_details.output_file_id}\"\n",
" )\n",
" print_highlight(\n",
" f\"<strong>Request counts: Total: {batch_details.request_counts.total} // Completed: {batch_details.request_counts.completed} // Failed: {batch_details.request_counts.failed}</strong>\"\n",
" )\n",
"\n",
" time.sleep(3)"
]
@@ -547,20 +824,114 @@
},
{
"cell_type": "code",
"execution_count": 46,
"execution_count": 9,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Created batch job with ID: batch_3d2dd881-ad84-465a-85ee-6d5991794e5e\n",
"Initial status: validating\n",
"Cancellation initiated. Status: cancelling\n",
"Current status: cancelled\n",
"Batch job successfully cancelled\n",
"Successfully cleaned up input file\n"
"[2024-10-28 02:03:23] INFO: 127.0.0.1:47360 - \"POST /v1/files HTTP/1.1\" 200 OK\n",
"[2024-10-28 02:03:23] INFO: 127.0.0.1:47360 - \"POST /v1/batches HTTP/1.1\" 200 OK\n"
]
},
{
"data": {
"text/html": [
"<strong style='color: #00008B;'>Created batch job with ID: batch_8a409f86-b8c7-4e29-9cc7-187d6d28df62</strong>"
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"<strong style='color: #00008B;'>Initial status: validating</strong>"
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"[2024-10-28 02:03:23 TP0] Prefill batch. #new-seq: 44, #new-token: 44, #cached-token: 2376, cache hit rate: 60.81%, token usage: 0.01, #running-req: 0, #queue-req: 0\n",
"[2024-10-28 02:03:23 TP0] Prefill batch. #new-seq: 328, #new-token: 8192, #cached-token: 9824, cache hit rate: 56.49%, token usage: 0.01, #running-req: 44, #queue-req: 128\n",
"[2024-10-28 02:03:24 TP0] Prefill batch. #new-seq: 129, #new-token: 3864, #cached-token: 3231, cache hit rate: 54.15%, token usage: 0.05, #running-req: 371, #queue-req: 1\n",
"[2024-10-28 02:03:27 TP0] Decode batch. #running-req: 500, #token: 29025, token usage: 0.13, gen throughput (token/s): 1162.55, #queue-req: 0\n",
"[2024-10-28 02:03:31 TP0] Decode batch. #running-req: 500, #token: 49025, token usage: 0.23, gen throughput (token/s): 5606.35, #queue-req: 0\n",
"[2024-10-28 02:03:33] INFO: 127.0.0.1:40110 - \"POST /v1/batches/batch_8a409f86-b8c7-4e29-9cc7-187d6d28df62/cancel HTTP/1.1\" 200 OK\n"
]
},
{
"data": {
"text/html": [
"<strong style='color: #00008B;'>Cancellation initiated. Status: cancelling</strong>"
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"[2024-10-28 02:03:36] INFO: 127.0.0.1:40110 - \"GET /v1/batches/batch_8a409f86-b8c7-4e29-9cc7-187d6d28df62 HTTP/1.1\" 200 OK\n"
]
},
{
"data": {
"text/html": [
"<strong style='color: #00008B;'>Current status: cancelled</strong>"
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"<strong style='color: #00008B;'>Batch job successfully cancelled</strong>"
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"[2024-10-28 02:03:36] INFO: 127.0.0.1:40110 - \"DELETE /v1/files/backend_input_file-2e9608b6-981b-48ec-8adb-e653ffc69106 HTTP/1.1\" 200 OK\n"
]
},
{
"data": {
"text/html": [
"<strong style='color: #00008B;'>Successfully cleaned up input file</strong>"
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
@@ -608,45 +979,57 @@
" completion_window=\"24h\",\n",
")\n",
"\n",
"print(f\"Created batch job with ID: {batch_job.id}\")\n",
"print(f\"Initial status: {batch_job.status}\")\n",
"print_highlight(f\"Created batch job with ID: {batch_job.id}\")\n",
"print_highlight(f\"Initial status: {batch_job.status}\")\n",
"\n",
"time.sleep(10)\n",
"\n",
"try:\n",
" cancelled_job = client.batches.cancel(batch_id=batch_job.id)\n",
" print(f\"Cancellation initiated. Status: {cancelled_job.status}\")\n",
" print_highlight(f\"Cancellation initiated. Status: {cancelled_job.status}\")\n",
" assert cancelled_job.status == \"cancelling\"\n",
"\n",
" # Monitor the cancellation process\n",
" while cancelled_job.status not in [\"failed\", \"cancelled\"]:\n",
" time.sleep(3)\n",
" cancelled_job = client.batches.retrieve(batch_job.id)\n",
" print(f\"Current status: {cancelled_job.status}\")\n",
" print_highlight(f\"Current status: {cancelled_job.status}\")\n",
"\n",
" # Verify final status\n",
" assert cancelled_job.status == \"cancelled\"\n",
" print(\"Batch job successfully cancelled\")\n",
" print_highlight(\"Batch job successfully cancelled\")\n",
"\n",
"except Exception as e:\n",
" print(f\"Error during cancellation: {e}\")\n",
" print_highlight(f\"Error during cancellation: {e}\")\n",
" raise e\n",
"\n",
"finally:\n",
" try:\n",
" del_response = client.files.delete(uploaded_file.id)\n",
" if del_response.deleted:\n",
" print(\"Successfully cleaned up input file\")\n",
" print_highlight(\"Successfully cleaned up input file\")\n",
" except Exception as e:\n",
" print(f\"Error cleaning up: {e}\")\n",
" print_highlight(f\"Error cleaning up: {e}\")\n",
" raise e"
]
},
{
"cell_type": "code",
"execution_count": 47,
"execution_count": 10,
"metadata": {},
"outputs": [],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[2024-10-28 02:03:36] INFO: Shutting down\n",
"[2024-10-28 02:03:36] INFO: Waiting for application shutdown.\n",
"[2024-10-28 02:03:36] INFO: Application shutdown complete.\n",
"[2024-10-28 02:03:36] INFO: Finished server process [1185529]\n",
"W1028 02:03:37.084000 140231994889792 torch/_inductor/compile_worker/subproc_pool.py:126] SubprocPool unclean exit\n"
]
}
],
"source": [
"terminate_process(server_process)"
]