Imporve openai api documents (#1827)
Co-authored-by: Chayenne <zhaochenyang@g.ucla.edu>
This commit is contained in:
@@ -6,7 +6,9 @@
|
||||
"source": [
|
||||
"# OpenAI Compatible API\n",
|
||||
"\n",
|
||||
"SGLang provides an OpenAI compatible API for smooth transition from OpenAI services.\n",
|
||||
"SGLang provides an OpenAI compatible API for smooth transition from OpenAI services. Full reference of the API is available at [OpenAI API Reference](https://platform.openai.com/docs/api-reference).\n",
|
||||
"\n",
|
||||
"This tutorial aims at these popular APIs:\n",
|
||||
"\n",
|
||||
"- `chat/completions`\n",
|
||||
"- `completions`\n",
|
||||
@@ -27,42 +29,99 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 38,
|
||||
"execution_count": 1,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Server is ready. Proceeding with the next steps.\n"
|
||||
"/home/chenyang/miniconda3/envs/AlphaMeemory/lib/python3.11/site-packages/transformers/utils/hub.py:127: FutureWarning: Using `TRANSFORMERS_CACHE` is deprecated and will be removed in v5 of Transformers. Use `HF_HOME` instead.\n",
|
||||
" warnings.warn(\n",
|
||||
"[2024-10-28 02:02:31] server_args=ServerArgs(model_path='meta-llama/Meta-Llama-3.1-8B-Instruct', tokenizer_path='meta-llama/Meta-Llama-3.1-8B-Instruct', tokenizer_mode='auto', skip_tokenizer_init=False, load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization=None, context_length=None, device='cuda', served_model_name='meta-llama/Meta-Llama-3.1-8B-Instruct', chat_template=None, is_embedding=False, host='0.0.0.0', port=30000, mem_fraction_static=0.88, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=8192, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, tp_size=1, stream_interval=1, random_seed=800169736, constrained_json_whitespace_pattern=None, log_level='info', log_level_http=None, log_requests=False, show_time_cost=False, api_key=None, file_storage_pth='SGLang_storage', enable_cache_report=False, watchdog_timeout=600, dp_size=1, load_balance_method='round_robin', dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', disable_flashinfer=False, disable_flashinfer_sampling=False, disable_radix_cache=False, disable_regex_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_penalizer=False, disable_nan_detection=False, enable_overlap_schedule=False, enable_mixed_chunk=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=160, torchao_config='', enable_p2p_check=False, triton_attention_reduce_in_fp32=False, num_continuous_decode_steps=1)\n",
|
||||
"/home/chenyang/miniconda3/envs/AlphaMeemory/lib/python3.11/site-packages/transformers/utils/hub.py:127: FutureWarning: Using `TRANSFORMERS_CACHE` is deprecated and will be removed in v5 of Transformers. Use `HF_HOME` instead.\n",
|
||||
" warnings.warn(\n",
|
||||
"/home/chenyang/miniconda3/envs/AlphaMeemory/lib/python3.11/site-packages/transformers/utils/hub.py:127: FutureWarning: Using `TRANSFORMERS_CACHE` is deprecated and will be removed in v5 of Transformers. Use `HF_HOME` instead.\n",
|
||||
" warnings.warn(\n",
|
||||
"[2024-10-28 02:02:36 TP0] Init torch distributed begin.\n",
|
||||
"[2024-10-28 02:02:37 TP0] Load weight begin. avail mem=47.27 GB\n",
|
||||
"[2024-10-28 02:02:37 TP0] Ignore import error when loading sglang.srt.models.mllama. No module named 'transformers.models.mllama'\n",
|
||||
"INFO 10-28 02:02:38 weight_utils.py:236] Using model weights format ['*.safetensors']\n",
|
||||
"Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]\n",
|
||||
"Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:00<00:01, 2.57it/s]\n",
|
||||
"Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:00<00:00, 2.45it/s]\n",
|
||||
"Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:00<00:00, 3.53it/s]\n",
|
||||
"Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:01<00:00, 2.98it/s]\n",
|
||||
"Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:01<00:00, 2.94it/s]\n",
|
||||
"\n",
|
||||
"[2024-10-28 02:02:40 TP0] Load weight end. type=LlamaForCausalLM, dtype=torch.bfloat16, avail mem=32.22 GB\n",
|
||||
"[2024-10-28 02:02:40 TP0] Memory pool end. avail mem=4.60 GB\n",
|
||||
"[2024-10-28 02:02:40 TP0] Capture cuda graph begin. This can take up to several minutes.\n",
|
||||
"[2024-10-28 02:02:48 TP0] max_total_num_tokens=217512, max_prefill_tokens=16384, max_running_requests=2049, context_len=131072\n",
|
||||
"[2024-10-28 02:02:48] INFO: Started server process [1185529]\n",
|
||||
"[2024-10-28 02:02:48] INFO: Waiting for application startup.\n",
|
||||
"[2024-10-28 02:02:48] INFO: Application startup complete.\n",
|
||||
"[2024-10-28 02:02:48] INFO: Uvicorn running on http://0.0.0.0:30000 (Press CTRL+C to quit)\n",
|
||||
"[2024-10-28 02:02:48] INFO: 127.0.0.1:47904 - \"GET /v1/models HTTP/1.1\" 200 OK\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/html": [
|
||||
"<strong style='color: #00008B;'>Server is ready. Proceeding with the next steps.</strong>"
|
||||
],
|
||||
"text/plain": [
|
||||
"<IPython.core.display.HTML object>"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from sglang.utils import execute_shell_command, wait_for_server, terminate_process\n",
|
||||
"\n",
|
||||
"server_process = execute_shell_command(\n",
|
||||
" \"\"\"\n",
|
||||
"python -m sglang.launch_server --model-path meta-llama/Meta-Llama-3.1-8B-Instruct \\\n",
|
||||
"--port 30000 --host 0.0.0.0 --log-level warning\n",
|
||||
"\"\"\"\n",
|
||||
"from sglang.utils import (\n",
|
||||
" execute_shell_command,\n",
|
||||
" wait_for_server,\n",
|
||||
" terminate_process,\n",
|
||||
" print_highlight,\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"wait_for_server(\"http://localhost:30000\")\n",
|
||||
"print(\"Server is ready. Proceeding with the next steps.\")"
|
||||
"server_process = execute_shell_command(\n",
|
||||
" command=\"python -m sglang.launch_server --model-path meta-llama/Meta-Llama-3.1-8B-Instruct --port 30000 --host 0.0.0.0\"\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"wait_for_server(\"http://localhost:30000\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 39,
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"ChatCompletion(id='e854540ec7914b2d8c712f16fd9ed2ca', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content='Here are 3 countries and their capitals:\\n\\n1. **Country:** Japan\\n**Capital:** Tokyo\\n\\n2. **Country:** Australia\\n**Capital:** Canberra\\n\\n3. **Country:** Brazil\\n**Capital:** Brasília', refusal=None, role='assistant', function_call=None, tool_calls=None), matched_stop=128009)], created=1730012326, model='meta-llama/Meta-Llama-3.1-8B-Instruct', object='chat.completion', service_tier=None, system_fingerprint=None, usage=CompletionUsage(completion_tokens=46, prompt_tokens=49, total_tokens=95, prompt_tokens_details=None))\n"
|
||||
"[2024-10-28 02:02:49 TP0] Prefill batch. #new-seq: 1, #new-token: 49, #cached-token: 0, cache hit rate: 0.00%, token usage: 0.00, #running-req: 0, #queue-req: 0\n",
|
||||
"[2024-10-28 02:02:49] INFO: 127.0.0.1:47912 - \"GET /get_model_info HTTP/1.1\" 200 OK\n",
|
||||
"[2024-10-28 02:02:49 TP0] Prefill batch. #new-seq: 1, #new-token: 6, #cached-token: 1, cache hit rate: 1.79%, token usage: 0.00, #running-req: 1, #queue-req: 0\n",
|
||||
"[2024-10-28 02:02:49] INFO: 127.0.0.1:47926 - \"POST /generate HTTP/1.1\" 200 OK\n",
|
||||
"[2024-10-28 02:02:49] The server is fired up and ready to roll!\n",
|
||||
"[2024-10-28 02:02:50 TP0] Decode batch. #running-req: 1, #token: 89, token usage: 0.00, gen throughput (token/s): 24.12, #queue-req: 0\n",
|
||||
"[2024-10-28 02:02:50] INFO: 127.0.0.1:47910 - \"POST /v1/chat/completions HTTP/1.1\" 200 OK\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/html": [
|
||||
"<strong style='color: #00008B;'>Response: ChatCompletion(id='692899ebd3ea464dbb456008a7d60bf3', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content='Here are 3 countries and their capitals:\\n\\n1. **Country:** Japan\\n**Capital:** Tokyo\\n\\n2. **Country:** Australia\\n**Capital:** Canberra\\n\\n3. **Country:** Brazil\\n**Capital:** Brasília', refusal=None, role='assistant', function_call=None, tool_calls=None), matched_stop=128009)], created=1730106170, model='meta-llama/Meta-Llama-3.1-8B-Instruct', object='chat.completion', service_tier=None, system_fingerprint=None, usage=CompletionUsage(completion_tokens=46, prompt_tokens=49, total_tokens=95, prompt_tokens_details=None))</strong>"
|
||||
],
|
||||
"text/plain": [
|
||||
"<IPython.core.display.HTML object>"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
@@ -84,7 +143,8 @@
|
||||
" temperature=0,\n",
|
||||
" max_tokens=64,\n",
|
||||
")\n",
|
||||
"print(response)"
|
||||
"\n",
|
||||
"print_highlight(f\"Response: {response}\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -93,40 +153,35 @@
|
||||
"source": [
|
||||
"### Parameters\n",
|
||||
"\n",
|
||||
"The chat completions API accepts the following parameters (refer to [OpenAI Chat Completions API](https://platform.openai.com/docs/api-reference/chat/create) for more details):\n",
|
||||
"\n",
|
||||
"- `messages`: List of messages in the conversation, each containing `role` and `content`\n",
|
||||
"- `model`: The model identifier to use for completion\n",
|
||||
"- `max_tokens`: Maximum number of tokens to generate in the response\n",
|
||||
"- `temperature`: Controls randomness (0-2). Lower values make output more focused and deterministic\n",
|
||||
"- `top_p`: Alternative to temperature. Controls diversity via nucleus sampling\n",
|
||||
"- `n`: Number of chat completion choices to generate\n",
|
||||
"- `stream`: If true, partial message deltas will be sent as they become available\n",
|
||||
"- `stop`: Sequences where the API will stop generating further tokens\n",
|
||||
"- `presence_penalty`: Penalizes new tokens based on their presence in the text so far (-2.0 to 2.0)\n",
|
||||
"- `frequency_penalty`: Penalizes new tokens based on their frequency in the text so far (-2.0 to 2.0)\n",
|
||||
"- `logit_bias`: Modify the likelihood of specified tokens appearing in the completion\n",
|
||||
"- `logprobs`: Include log probabilities of tokens in the response\n",
|
||||
"- `top_logprobs`: Number of most likely tokens to return probabilities for\n",
|
||||
"- `seed`: Random seed for deterministic results\n",
|
||||
"- `response_format`: Specify the format of the response (e.g., JSON)\n",
|
||||
"- `stream_options`: Additional options for streaming responses\n",
|
||||
"- `user`: A unique identifier representing your end-user\n",
|
||||
"The chat completions API accepts OpenAI Chat Completions API's parameters. Refer to [OpenAI Chat Completions API](https://platform.openai.com/docs/api-reference/chat/create) for more details.\n",
|
||||
"\n",
|
||||
"Here is an example of a detailed chat completion request:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 40,
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Ancient Rome's major achievements include:"
|
||||
"[2024-10-28 02:02:50 TP0] Prefill batch. #new-seq: 1, #new-token: 48, #cached-token: 28, cache hit rate: 21.97%, token usage: 0.00, #running-req: 0, #queue-req: 0\n",
|
||||
"[2024-10-28 02:02:50] INFO: 127.0.0.1:47910 - \"POST /v1/chat/completions HTTP/1.1\" 200 OK\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/html": [
|
||||
"<strong style='color: #00008B;'>Response: ChatCompletion(id='bffa083869484c78ab89d334514d5af3', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content=\"Ancient Rome's major achievements include:\", refusal=None, role='assistant', function_call=None, tool_calls=None), matched_stop='\\n\\n')], created=1730106170, model='meta-llama/Meta-Llama-3.1-8B-Instruct', object='chat.completion', service_tier=None, system_fingerprint=None, usage=CompletionUsage(completion_tokens=8, prompt_tokens=76, total_tokens=84, prompt_tokens_details=None))</strong>"
|
||||
],
|
||||
"text/plain": [
|
||||
"<IPython.core.display.HTML object>"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
@@ -152,11 +207,9 @@
|
||||
" frequency_penalty=0.2, # Mild penalty for more natural language\n",
|
||||
" n=1, # Single response is usually more stable\n",
|
||||
" seed=42, # Keep for reproducibility\n",
|
||||
" stream=True, # Keep streaming for real-time output\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"for chunk in response:\n",
|
||||
" print(chunk.choices[0].delta.content or \"\", end=\"\")"
|
||||
"print_highlight(f\"Response: {response}\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -167,20 +220,34 @@
|
||||
"\n",
|
||||
"### Usage\n",
|
||||
"\n",
|
||||
"Completions API is similar to Chat Completions API, but without the `messages` parameter. Refer to [OpenAI Completions API](https://platform.openai.com/docs/api-reference/completions/create) for more details."
|
||||
"Completions API is similar to Chat Completions API, but without the `messages` parameter."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 41,
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Completion(id='a6e07198f4b445baa0fb08a2178ceb59', choices=[CompletionChoice(finish_reason='length', index=0, logprobs=None, text=' 1. 2. 3.\\n1. United States - Washington D.C. 2. Japan - Tokyo 3. Australia - Canberra\\nList 3 countries and their capitals. 1. 2. 3.\\n1. China - Beijing 2. Brazil - Bras', matched_stop=None)], created=1730012328, model='meta-llama/Meta-Llama-3.1-8B-Instruct', object='text_completion', system_fingerprint=None, usage=CompletionUsage(completion_tokens=64, prompt_tokens=9, total_tokens=73, prompt_tokens_details=None))\n"
|
||||
"[2024-10-28 02:02:50 TP0] Prefill batch. #new-seq: 1, #new-token: 8, #cached-token: 1, cache hit rate: 21.28%, token usage: 0.00, #running-req: 0, #queue-req: 0\n",
|
||||
"[2024-10-28 02:02:51 TP0] Decode batch. #running-req: 1, #token: 37, token usage: 0.00, gen throughput (token/s): 38.07, #queue-req: 0\n",
|
||||
"[2024-10-28 02:02:52] INFO: 127.0.0.1:47910 - \"POST /v1/completions HTTP/1.1\" 200 OK\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/html": [
|
||||
"<strong style='color: #00008B;'>Response: Completion(id='eb486d0a32fd4384baba923f3bc17e8b', choices=[CompletionChoice(finish_reason='length', index=0, logprobs=None, text=' 1. 2. 3.\\n1. United States - Washington D.C. 2. Japan - Tokyo 3. Australia - Canberra\\nList 3 countries and their capitals. 1. 2. 3.\\n1. China - Beijing 2. Brazil - Bras', matched_stop=None)], created=1730106172, model='meta-llama/Meta-Llama-3.1-8B-Instruct', object='text_completion', system_fingerprint=None, usage=CompletionUsage(completion_tokens=64, prompt_tokens=9, total_tokens=73, prompt_tokens_details=None))</strong>"
|
||||
],
|
||||
"text/plain": [
|
||||
"<IPython.core.display.HTML object>"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
@@ -192,7 +259,8 @@
|
||||
" n=1,\n",
|
||||
" stop=None,\n",
|
||||
")\n",
|
||||
"print(response)"
|
||||
"\n",
|
||||
"print_highlight(f\"Response: {response}\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -201,44 +269,39 @@
|
||||
"source": [
|
||||
"### Parameters\n",
|
||||
"\n",
|
||||
"The completions API accepts the following parameters:\n",
|
||||
"\n",
|
||||
"- `model`: The model identifier to use for completion\n",
|
||||
"- `prompt`: Input text to generate completions for. Can be a string, array of strings, or token arrays\n",
|
||||
"- `best_of`: Number of completions to generate server-side and return the best one\n",
|
||||
"- `echo`: If true, the prompt will be included in the response\n",
|
||||
"- `frequency_penalty`: Penalizes new tokens based on their frequency in the text so far (-2.0 to 2.0)\n",
|
||||
"- `logit_bias`: Modify the likelihood of specified tokens appearing in the completion\n",
|
||||
"- `logprobs`: Include log probabilities of tokens in the response\n",
|
||||
"- `max_tokens`: Maximum number of tokens to generate in the response (default: 16)\n",
|
||||
"- `n`: Number of completion choices to generate\n",
|
||||
"- `presence_penalty`: Penalizes new tokens based on their presence in the text so far (-2.0 to 2.0)\n",
|
||||
"- `seed`: Random seed for deterministic results\n",
|
||||
"- `stop`: Sequences where the API will stop generating further tokens\n",
|
||||
"- `stream`: If true, partial completion deltas will be sent as they become available\n",
|
||||
"- `stream_options`: Additional options for streaming responses\n",
|
||||
"- `suffix`: Text to append to the completion\n",
|
||||
"- `temperature`: Controls randomness (0-2). Lower values make output more focused and deterministic\n",
|
||||
"- `top_p`: Alternative to temperature. Controls diversity via nucleus sampling\n",
|
||||
"- `user`: A unique identifier representing your end-user\n",
|
||||
"The completions API accepts OpenAI Completions API's parameters. Refer to [OpenAI Completions API](https://platform.openai.com/docs/api-reference/completions/create) for more details.\n",
|
||||
"\n",
|
||||
"Here is an example of a detailed completions request:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 42,
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
" Space explorer, Captain Orion Blackwood, had been traveling through the galaxy for 12 years, searching for a new home for humanity. His ship, the Aurora, had been his home for so long that he barely remembered what it was like to walk on solid ground.\n",
|
||||
"As he navigated through the dense asteroid field, the ship's computer, S.A.R.A. (Self-Aware Reasoning Algorithm), alerted him to a strange reading on one of the asteroids. Captain Blackwood's curiosity was piqued, and he decided to investigate further.\n",
|
||||
"\"Captain, I'm detecting unusual energy signatures emanating from the asteroid,\" S.A.R.A. said. \"It's unlike anything I've seen before.\"\n",
|
||||
"Captain Blackwood's eyes narrowed as"
|
||||
"[2024-10-28 02:02:52 TP0] Prefill batch. #new-seq: 1, #new-token: 9, #cached-token: 1, cache hit rate: 20.53%, token usage: 0.00, #running-req: 0, #queue-req: 0\n",
|
||||
"[2024-10-28 02:02:52 TP0] Decode batch. #running-req: 1, #token: 15, token usage: 0.00, gen throughput (token/s): 40.91, #queue-req: 0\n",
|
||||
"[2024-10-28 02:02:53 TP0] Decode batch. #running-req: 1, #token: 55, token usage: 0.00, gen throughput (token/s): 42.13, #queue-req: 0\n",
|
||||
"[2024-10-28 02:02:54 TP0] Decode batch. #running-req: 1, #token: 95, token usage: 0.00, gen throughput (token/s): 42.10, #queue-req: 0\n",
|
||||
"[2024-10-28 02:02:55 TP0] Decode batch. #running-req: 1, #token: 135, token usage: 0.00, gen throughput (token/s): 41.94, #queue-req: 0\n",
|
||||
"[2024-10-28 02:02:55] INFO: 127.0.0.1:47910 - \"POST /v1/completions HTTP/1.1\" 200 OK\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/html": [
|
||||
"<strong style='color: #00008B;'>Response: Completion(id='fb23a12a15bc4137815b91d63b6fd976', choices=[CompletionChoice(finish_reason='length', index=0, logprobs=None, text=\" Here is a short story about a space explorer named Astrid.\\nAstrid had always been fascinated by the stars. As a child, she would spend hours gazing up at the night sky, dreaming of what lay beyond our small planet. Now, as a renowned space explorer, she had the chance to explore the cosmos firsthand.\\nAstrid's ship, the Aurora, was equipped with state-of-the-art technology that allowed her to traverse vast distances in a relatively short period of time. She had been traveling for weeks, and finally, she had reached her destination: a distant planet on the edge of the galaxy.\\nAs she entered the planet's atmosphere, Astrid felt a thrill of excitement. She had never seen anything like this before.\", matched_stop=None)], created=1730106175, model='meta-llama/Meta-Llama-3.1-8B-Instruct', object='text_completion', system_fingerprint=None, usage=CompletionUsage(completion_tokens=150, prompt_tokens=10, total_tokens=160, prompt_tokens_details=None))</strong>"
|
||||
],
|
||||
"text/plain": [
|
||||
"<IPython.core.display.HTML object>"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
@@ -253,11 +316,9 @@
|
||||
" frequency_penalty=0.3, # Reduce repetitive phrases\n",
|
||||
" n=1, # Generate one completion\n",
|
||||
" seed=123, # For reproducible results\n",
|
||||
" stream=True, # Stream the response\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"for chunk in response:\n",
|
||||
" print(chunk.choices[0].text or \"\", end=\"\")"
|
||||
"print_highlight(f\"Response: {response}\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -279,15 +340,29 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 43,
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Batch job created with ID: batch_03d7f74f-dffe-4c26-b5e7-bb9fb5cb89ff\n"
|
||||
"[2024-10-28 02:02:55] INFO: 127.0.0.1:43330 - \"POST /v1/files HTTP/1.1\" 200 OK\n",
|
||||
"[2024-10-28 02:02:55] INFO: 127.0.0.1:43330 - \"POST /v1/batches HTTP/1.1\" 200 OK\n",
|
||||
"[2024-10-28 02:02:55 TP0] Prefill batch. #new-seq: 2, #new-token: 30, #cached-token: 50, cache hit rate: 35.06%, token usage: 0.00, #running-req: 0, #queue-req: 0\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/html": [
|
||||
"<strong style='color: #00008B;'>Batch job created with ID: batch_56fefd2e-0187-4c14-aa2d-110917723dde</strong>"
|
||||
],
|
||||
"text/plain": [
|
||||
"<IPython.core.display.HTML object>"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
@@ -337,29 +412,91 @@
|
||||
" completion_window=\"24h\",\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"print(f\"Batch job created with ID: {batch_response.id}\")"
|
||||
"print_highlight(f\"Batch job created with ID: {batch_response.id}\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 44,
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"[2024-10-28 02:02:56 TP0] Decode batch. #running-req: 2, #token: 82, token usage: 0.00, gen throughput (token/s): 55.10, #queue-req: 0\n",
|
||||
"Batch job status: validating...trying again in 3 seconds...\n",
|
||||
"[2024-10-28 02:02:58] INFO: 127.0.0.1:43330 - \"GET /v1/batches/batch_56fefd2e-0187-4c14-aa2d-110917723dde HTTP/1.1\" 200 OK\n",
|
||||
"Batch job completed successfully!\n",
|
||||
"Request counts: BatchRequestCounts(completed=2, failed=0, total=2)\n",
|
||||
"\n",
|
||||
"Request request-1:\n",
|
||||
"Response: {'status_code': 200, 'request_id': 'request-1', 'body': {'id': 'request-1', 'object': 'chat.completion', 'created': 1730012333, 'model': 'meta-llama/Meta-Llama-3.1-8B-Instruct', 'choices': {'index': 0, 'message': {'role': 'assistant', 'content': 'Why do programmers prefer dark mode?\\n\\nBecause light attracts bugs.'}, 'logprobs': None, 'finish_reason': 'stop', 'matched_stop': 128009}, 'usage': {'prompt_tokens': 41, 'completion_tokens': 13, 'total_tokens': 54}, 'system_fingerprint': None}}\n",
|
||||
"\n",
|
||||
"Request request-2:\n",
|
||||
"Response: {'status_code': 200, 'request_id': 'request-2', 'body': {'id': 'request-2', 'object': 'chat.completion', 'created': 1730012333, 'model': 'meta-llama/Meta-Llama-3.1-8B-Instruct', 'choices': {'index': 0, 'message': {'role': 'assistant', 'content': '**What is Python?**\\n\\nPython is a high-level, interpreted programming language that is widely used for various purposes, including:\\n\\n* **Web Development**: Building web applications, web services, and web scraping.\\n* **Data Science**: Data analysis'}, 'logprobs': None, 'finish_reason': 'length', 'matched_stop': None}, 'usage': {'prompt_tokens': 39, 'completion_tokens': 50, 'total_tokens': 89}, 'system_fingerprint': None}}\n",
|
||||
"\n",
|
||||
"Cleaning up files...\n"
|
||||
"[2024-10-28 02:02:58] INFO: 127.0.0.1:43330 - \"GET /v1/files/backend_result_file-520da6c8-0cce-4d4c-a943-a86101f5f5b4/content HTTP/1.1\" 200 OK\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/html": [
|
||||
"<strong style='color: #00008B;'>Request request-1:</strong>"
|
||||
],
|
||||
"text/plain": [
|
||||
"<IPython.core.display.HTML object>"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/html": [
|
||||
"<strong style='color: #00008B;'>Response: {'status_code': 200, 'request_id': 'request-1', 'body': {'id': 'request-1', 'object': 'chat.completion', 'created': 1730106176, 'model': 'meta-llama/Meta-Llama-3.1-8B-Instruct', 'choices': {'index': 0, 'message': {'role': 'assistant', 'content': 'A programmer walks into a library and asks the librarian, \"Do you have any books on Pavlov\\'s dogs and Schrödinger\\'s cat?\"\\n\\nThe librarian replies, \"It rings a bell, but I\\'m not sure if it\\'s here'}, 'logprobs': None, 'finish_reason': 'length', 'matched_stop': None}, 'usage': {'prompt_tokens': 41, 'completion_tokens': 50, 'total_tokens': 91}, 'system_fingerprint': None}}</strong>"
|
||||
],
|
||||
"text/plain": [
|
||||
"<IPython.core.display.HTML object>"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/html": [
|
||||
"<strong style='color: #00008B;'>Request request-2:</strong>"
|
||||
],
|
||||
"text/plain": [
|
||||
"<IPython.core.display.HTML object>"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/html": [
|
||||
"<strong style='color: #00008B;'>Response: {'status_code': 200, 'request_id': 'request-2', 'body': {'id': 'request-2', 'object': 'chat.completion', 'created': 1730106176, 'model': 'meta-llama/Meta-Llama-3.1-8B-Instruct', 'choices': {'index': 0, 'message': {'role': 'assistant', 'content': '**What is Python?**\\n\\nPython is a high-level, interpreted programming language that is widely used for various purposes, including:\\n\\n1. **Web Development**: Building web applications and web services using frameworks like Django and Flask.\\n2. **Data Analysis and'}, 'logprobs': None, 'finish_reason': 'length', 'matched_stop': None}, 'usage': {'prompt_tokens': 39, 'completion_tokens': 50, 'total_tokens': 89}, 'system_fingerprint': None}}</strong>"
|
||||
],
|
||||
"text/plain": [
|
||||
"<IPython.core.display.HTML object>"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/html": [
|
||||
"<strong style='color: #00008B;'>Cleaning up files...</strong>"
|
||||
],
|
||||
"text/plain": [
|
||||
"<IPython.core.display.HTML object>"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"[2024-10-28 02:02:58] INFO: 127.0.0.1:43330 - \"DELETE /v1/files/backend_result_file-520da6c8-0cce-4d4c-a943-a86101f5f5b4 HTTP/1.1\" 200 OK\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
@@ -382,16 +519,16 @@
|
||||
" ]\n",
|
||||
"\n",
|
||||
" for result in results:\n",
|
||||
" print(f\"\\nRequest {result['custom_id']}:\")\n",
|
||||
" print(f\"Response: {result['response']}\")\n",
|
||||
" print_highlight(f\"Request {result['custom_id']}:\")\n",
|
||||
" print_highlight(f\"Response: {result['response']}\")\n",
|
||||
"\n",
|
||||
" print(\"\\nCleaning up files...\")\n",
|
||||
" print_highlight(\"Cleaning up files...\")\n",
|
||||
" # Only delete the result file ID since file_response is just content\n",
|
||||
" client.files.delete(result_file_id)\n",
|
||||
"else:\n",
|
||||
" print(f\"Batch job failed with status: {batch_response.status}\")\n",
|
||||
" print_highlight(f\"Batch job failed with status: {batch_response.status}\")\n",
|
||||
" if hasattr(batch_response, \"errors\"):\n",
|
||||
" print(f\"Errors: {batch_response.errors}\")"
|
||||
" print_highlight(f\"Errors: {batch_response.errors}\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -408,66 +545,210 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 45,
|
||||
"execution_count": 8,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Created batch job with ID: batch_6b9625ac-9ebc-4c4f-bfd5-f84f88b0100d\n",
|
||||
"Initial status: validating\n",
|
||||
"Batch job details (check 1/5):\n",
|
||||
"ID: batch_6b9625ac-9ebc-4c4f-bfd5-f84f88b0100d\n",
|
||||
"Status: in_progress\n",
|
||||
"Created at: 1730012334\n",
|
||||
"Input file ID: backend_input_file-8203d42a-109c-4573-9663-13b5d9cb6a2b\n",
|
||||
"Output file ID: None\n",
|
||||
"Request counts:\n",
|
||||
"Total: 0\n",
|
||||
"Completed: 0\n",
|
||||
"Failed: 0\n",
|
||||
"Batch job details (check 2/5):\n",
|
||||
"ID: batch_6b9625ac-9ebc-4c4f-bfd5-f84f88b0100d\n",
|
||||
"Status: in_progress\n",
|
||||
"Created at: 1730012334\n",
|
||||
"Input file ID: backend_input_file-8203d42a-109c-4573-9663-13b5d9cb6a2b\n",
|
||||
"Output file ID: None\n",
|
||||
"Request counts:\n",
|
||||
"Total: 0\n",
|
||||
"Completed: 0\n",
|
||||
"Failed: 0\n",
|
||||
"Batch job details (check 3/5):\n",
|
||||
"ID: batch_6b9625ac-9ebc-4c4f-bfd5-f84f88b0100d\n",
|
||||
"Status: in_progress\n",
|
||||
"Created at: 1730012334\n",
|
||||
"Input file ID: backend_input_file-8203d42a-109c-4573-9663-13b5d9cb6a2b\n",
|
||||
"Output file ID: None\n",
|
||||
"Request counts:\n",
|
||||
"Total: 0\n",
|
||||
"Completed: 0\n",
|
||||
"Failed: 0\n",
|
||||
"Batch job details (check 4/5):\n",
|
||||
"ID: batch_6b9625ac-9ebc-4c4f-bfd5-f84f88b0100d\n",
|
||||
"Status: completed\n",
|
||||
"Created at: 1730012334\n",
|
||||
"Input file ID: backend_input_file-8203d42a-109c-4573-9663-13b5d9cb6a2b\n",
|
||||
"Output file ID: backend_result_file-d32f441d-e737-4da3-b07a-c39349425b3a\n",
|
||||
"Request counts:\n",
|
||||
"Total: 100\n",
|
||||
"Completed: 100\n",
|
||||
"Failed: 0\n",
|
||||
"Batch job details (check 5/5):\n",
|
||||
"ID: batch_6b9625ac-9ebc-4c4f-bfd5-f84f88b0100d\n",
|
||||
"Status: completed\n",
|
||||
"Created at: 1730012334\n",
|
||||
"Input file ID: backend_input_file-8203d42a-109c-4573-9663-13b5d9cb6a2b\n",
|
||||
"Output file ID: backend_result_file-d32f441d-e737-4da3-b07a-c39349425b3a\n",
|
||||
"Request counts:\n",
|
||||
"Total: 100\n",
|
||||
"Completed: 100\n",
|
||||
"Failed: 0\n"
|
||||
"[2024-10-28 02:02:58] INFO: 127.0.0.1:43336 - \"POST /v1/files HTTP/1.1\" 200 OK\n",
|
||||
"[2024-10-28 02:02:58] INFO: 127.0.0.1:43336 - \"POST /v1/batches HTTP/1.1\" 200 OK\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/html": [
|
||||
"<strong style='color: #00008B;'>Created batch job with ID: batch_67da0e16-e7b2-4a75-9f7a-58c033e739e5</strong>"
|
||||
],
|
||||
"text/plain": [
|
||||
"<IPython.core.display.HTML object>"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/html": [
|
||||
"<strong style='color: #00008B;'>Initial status: validating</strong>"
|
||||
],
|
||||
"text/plain": [
|
||||
"<IPython.core.display.HTML object>"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"[2024-10-28 02:02:58 TP0] Prefill batch. #new-seq: 17, #new-token: 510, #cached-token: 425, cache hit rate: 43.40%, token usage: 0.00, #running-req: 0, #queue-req: 0\n",
|
||||
"[2024-10-28 02:02:58 TP0] Prefill batch. #new-seq: 83, #new-token: 2490, #cached-token: 2075, cache hit rate: 45.04%, token usage: 0.00, #running-req: 17, #queue-req: 0\n",
|
||||
"[2024-10-28 02:02:59 TP0] Decode batch. #running-req: 100, #token: 3725, token usage: 0.02, gen throughput (token/s): 234.43, #queue-req: 0\n",
|
||||
"[2024-10-28 02:03:00 TP0] Decode batch. #running-req: 100, #token: 7725, token usage: 0.04, gen throughput (token/s): 3545.41, #queue-req: 0\n",
|
||||
"[2024-10-28 02:03:01 TP0] Decode batch. #running-req: 100, #token: 11725, token usage: 0.05, gen throughput (token/s): 3448.10, #queue-req: 0\n",
|
||||
"[2024-10-28 02:03:02 TP0] Decode batch. #running-req: 100, #token: 15725, token usage: 0.07, gen throughput (token/s): 3362.62, #queue-req: 0\n",
|
||||
"[2024-10-28 02:03:04 TP0] Decode batch. #running-req: 100, #token: 19725, token usage: 0.09, gen throughput (token/s): 3279.58, #queue-req: 0\n",
|
||||
"[2024-10-28 02:03:05 TP0] Decode batch. #running-req: 100, #token: 23725, token usage: 0.11, gen throughput (token/s): 3200.86, #queue-req: 0\n",
|
||||
"[2024-10-28 02:03:06 TP0] Decode batch. #running-req: 100, #token: 27725, token usage: 0.13, gen throughput (token/s): 3126.52, #queue-req: 0\n",
|
||||
"[2024-10-28 02:03:07 TP0] Decode batch. #running-req: 100, #token: 31725, token usage: 0.15, gen throughput (token/s): 3053.16, #queue-req: 0\n",
|
||||
"[2024-10-28 02:03:08] INFO: 127.0.0.1:41320 - \"GET /v1/batches/batch_67da0e16-e7b2-4a75-9f7a-58c033e739e5 HTTP/1.1\" 200 OK\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/html": [
|
||||
"<strong style='color: #00008B;'>Batch job details (check 1 / 5) // ID: batch_67da0e16-e7b2-4a75-9f7a-58c033e739e5 // Status: in_progress // Created at: 1730106178 // Input file ID: backend_input_file-92cf2cc1-afbd-428f-8c5c-85fabd86cb63 // Output file ID: None</strong>"
|
||||
],
|
||||
"text/plain": [
|
||||
"<IPython.core.display.HTML object>"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/html": [
|
||||
"<strong style='color: #00008B;'><strong>Request counts: Total: 0 // Completed: 0 // Failed: 0</strong></strong>"
|
||||
],
|
||||
"text/plain": [
|
||||
"<IPython.core.display.HTML object>"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"[2024-10-28 02:03:09 TP0] Decode batch. #running-req: 100, #token: 35725, token usage: 0.16, gen throughput (token/s): 2980.26, #queue-req: 0\n",
|
||||
"[2024-10-28 02:03:10 TP0] Decode batch. #running-req: 100, #token: 39725, token usage: 0.18, gen throughput (token/s): 2919.09, #queue-req: 0\n",
|
||||
"[2024-10-28 02:03:11] INFO: 127.0.0.1:41320 - \"GET /v1/batches/batch_67da0e16-e7b2-4a75-9f7a-58c033e739e5 HTTP/1.1\" 200 OK\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/html": [
|
||||
"<strong style='color: #00008B;'>Batch job details (check 2 / 5) // ID: batch_67da0e16-e7b2-4a75-9f7a-58c033e739e5 // Status: in_progress // Created at: 1730106178 // Input file ID: backend_input_file-92cf2cc1-afbd-428f-8c5c-85fabd86cb63 // Output file ID: None</strong>"
|
||||
],
|
||||
"text/plain": [
|
||||
"<IPython.core.display.HTML object>"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/html": [
|
||||
"<strong style='color: #00008B;'><strong>Request counts: Total: 0 // Completed: 0 // Failed: 0</strong></strong>"
|
||||
],
|
||||
"text/plain": [
|
||||
"<IPython.core.display.HTML object>"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"[2024-10-28 02:03:11 TP0] Decode batch. #running-req: 100, #token: 43725, token usage: 0.20, gen throughput (token/s): 2854.92, #queue-req: 0\n",
|
||||
"[2024-10-28 02:03:13 TP0] Decode batch. #running-req: 100, #token: 47725, token usage: 0.22, gen throughput (token/s): 2794.62, #queue-req: 0\n",
|
||||
"[2024-10-28 02:03:14] INFO: 127.0.0.1:41320 - \"GET /v1/batches/batch_67da0e16-e7b2-4a75-9f7a-58c033e739e5 HTTP/1.1\" 200 OK\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/html": [
|
||||
"<strong style='color: #00008B;'>Batch job details (check 3 / 5) // ID: batch_67da0e16-e7b2-4a75-9f7a-58c033e739e5 // Status: in_progress // Created at: 1730106178 // Input file ID: backend_input_file-92cf2cc1-afbd-428f-8c5c-85fabd86cb63 // Output file ID: None</strong>"
|
||||
],
|
||||
"text/plain": [
|
||||
"<IPython.core.display.HTML object>"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/html": [
|
||||
"<strong style='color: #00008B;'><strong>Request counts: Total: 0 // Completed: 0 // Failed: 0</strong></strong>"
|
||||
],
|
||||
"text/plain": [
|
||||
"<IPython.core.display.HTML object>"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"[2024-10-28 02:03:14 TP0] Decode batch. #running-req: 100, #token: 51725, token usage: 0.24, gen throughput (token/s): 2737.84, #queue-req: 0\n",
|
||||
"[2024-10-28 02:03:17] INFO: 127.0.0.1:41320 - \"GET /v1/batches/batch_67da0e16-e7b2-4a75-9f7a-58c033e739e5 HTTP/1.1\" 200 OK\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/html": [
|
||||
"<strong style='color: #00008B;'>Batch job details (check 4 / 5) // ID: batch_67da0e16-e7b2-4a75-9f7a-58c033e739e5 // Status: completed // Created at: 1730106178 // Input file ID: backend_input_file-92cf2cc1-afbd-428f-8c5c-85fabd86cb63 // Output file ID: backend_result_file-c10ee9f5-eca8-4357-a922-934543b7f433</strong>"
|
||||
],
|
||||
"text/plain": [
|
||||
"<IPython.core.display.HTML object>"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/html": [
|
||||
"<strong style='color: #00008B;'><strong>Request counts: Total: 100 // Completed: 100 // Failed: 0</strong></strong>"
|
||||
],
|
||||
"text/plain": [
|
||||
"<IPython.core.display.HTML object>"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"[2024-10-28 02:03:20] INFO: 127.0.0.1:41320 - \"GET /v1/batches/batch_67da0e16-e7b2-4a75-9f7a-58c033e739e5 HTTP/1.1\" 200 OK\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/html": [
|
||||
"<strong style='color: #00008B;'>Batch job details (check 5 / 5) // ID: batch_67da0e16-e7b2-4a75-9f7a-58c033e739e5 // Status: completed // Created at: 1730106178 // Input file ID: backend_input_file-92cf2cc1-afbd-428f-8c5c-85fabd86cb63 // Output file ID: backend_result_file-c10ee9f5-eca8-4357-a922-934543b7f433</strong>"
|
||||
],
|
||||
"text/plain": [
|
||||
"<IPython.core.display.HTML object>"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/html": [
|
||||
"<strong style='color: #00008B;'><strong>Request counts: Total: 100 // Completed: 100 // Failed: 0</strong></strong>"
|
||||
],
|
||||
"text/plain": [
|
||||
"<IPython.core.display.HTML object>"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
@@ -515,25 +796,21 @@
|
||||
" completion_window=\"24h\",\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"print(f\"Created batch job with ID: {batch_job.id}\")\n",
|
||||
"print(f\"Initial status: {batch_job.status}\")\n",
|
||||
"print_highlight(f\"Created batch job with ID: {batch_job.id}\")\n",
|
||||
"print_highlight(f\"Initial status: {batch_job.status}\")\n",
|
||||
"\n",
|
||||
"time.sleep(10)\n",
|
||||
"\n",
|
||||
"max_checks = 5\n",
|
||||
"for i in range(max_checks):\n",
|
||||
" batch_details = client.batches.retrieve(batch_id=batch_job.id)\n",
|
||||
" print(f\"Batch job details (check {i+1}/{max_checks}):\")\n",
|
||||
" print(f\"ID: {batch_details.id}\")\n",
|
||||
" print(f\"Status: {batch_details.status}\")\n",
|
||||
" print(f\"Created at: {batch_details.created_at}\")\n",
|
||||
" print(f\"Input file ID: {batch_details.input_file_id}\")\n",
|
||||
" print(f\"Output file ID: {batch_details.output_file_id}\")\n",
|
||||
"\n",
|
||||
" print(\"Request counts:\")\n",
|
||||
" print(f\"Total: {batch_details.request_counts.total}\")\n",
|
||||
" print(f\"Completed: {batch_details.request_counts.completed}\")\n",
|
||||
" print(f\"Failed: {batch_details.request_counts.failed}\")\n",
|
||||
" print_highlight(\n",
|
||||
" f\"Batch job details (check {i+1} / {max_checks}) // ID: {batch_details.id} // Status: {batch_details.status} // Created at: {batch_details.created_at} // Input file ID: {batch_details.input_file_id} // Output file ID: {batch_details.output_file_id}\"\n",
|
||||
" )\n",
|
||||
" print_highlight(\n",
|
||||
" f\"<strong>Request counts: Total: {batch_details.request_counts.total} // Completed: {batch_details.request_counts.completed} // Failed: {batch_details.request_counts.failed}</strong>\"\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
" time.sleep(3)"
|
||||
]
|
||||
@@ -547,20 +824,114 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 46,
|
||||
"execution_count": 9,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Created batch job with ID: batch_3d2dd881-ad84-465a-85ee-6d5991794e5e\n",
|
||||
"Initial status: validating\n",
|
||||
"Cancellation initiated. Status: cancelling\n",
|
||||
"Current status: cancelled\n",
|
||||
"Batch job successfully cancelled\n",
|
||||
"Successfully cleaned up input file\n"
|
||||
"[2024-10-28 02:03:23] INFO: 127.0.0.1:47360 - \"POST /v1/files HTTP/1.1\" 200 OK\n",
|
||||
"[2024-10-28 02:03:23] INFO: 127.0.0.1:47360 - \"POST /v1/batches HTTP/1.1\" 200 OK\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/html": [
|
||||
"<strong style='color: #00008B;'>Created batch job with ID: batch_8a409f86-b8c7-4e29-9cc7-187d6d28df62</strong>"
|
||||
],
|
||||
"text/plain": [
|
||||
"<IPython.core.display.HTML object>"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/html": [
|
||||
"<strong style='color: #00008B;'>Initial status: validating</strong>"
|
||||
],
|
||||
"text/plain": [
|
||||
"<IPython.core.display.HTML object>"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"[2024-10-28 02:03:23 TP0] Prefill batch. #new-seq: 44, #new-token: 44, #cached-token: 2376, cache hit rate: 60.81%, token usage: 0.01, #running-req: 0, #queue-req: 0\n",
|
||||
"[2024-10-28 02:03:23 TP0] Prefill batch. #new-seq: 328, #new-token: 8192, #cached-token: 9824, cache hit rate: 56.49%, token usage: 0.01, #running-req: 44, #queue-req: 128\n",
|
||||
"[2024-10-28 02:03:24 TP0] Prefill batch. #new-seq: 129, #new-token: 3864, #cached-token: 3231, cache hit rate: 54.15%, token usage: 0.05, #running-req: 371, #queue-req: 1\n",
|
||||
"[2024-10-28 02:03:27 TP0] Decode batch. #running-req: 500, #token: 29025, token usage: 0.13, gen throughput (token/s): 1162.55, #queue-req: 0\n",
|
||||
"[2024-10-28 02:03:31 TP0] Decode batch. #running-req: 500, #token: 49025, token usage: 0.23, gen throughput (token/s): 5606.35, #queue-req: 0\n",
|
||||
"[2024-10-28 02:03:33] INFO: 127.0.0.1:40110 - \"POST /v1/batches/batch_8a409f86-b8c7-4e29-9cc7-187d6d28df62/cancel HTTP/1.1\" 200 OK\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/html": [
|
||||
"<strong style='color: #00008B;'>Cancellation initiated. Status: cancelling</strong>"
|
||||
],
|
||||
"text/plain": [
|
||||
"<IPython.core.display.HTML object>"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"[2024-10-28 02:03:36] INFO: 127.0.0.1:40110 - \"GET /v1/batches/batch_8a409f86-b8c7-4e29-9cc7-187d6d28df62 HTTP/1.1\" 200 OK\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/html": [
|
||||
"<strong style='color: #00008B;'>Current status: cancelled</strong>"
|
||||
],
|
||||
"text/plain": [
|
||||
"<IPython.core.display.HTML object>"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/html": [
|
||||
"<strong style='color: #00008B;'>Batch job successfully cancelled</strong>"
|
||||
],
|
||||
"text/plain": [
|
||||
"<IPython.core.display.HTML object>"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"[2024-10-28 02:03:36] INFO: 127.0.0.1:40110 - \"DELETE /v1/files/backend_input_file-2e9608b6-981b-48ec-8adb-e653ffc69106 HTTP/1.1\" 200 OK\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/html": [
|
||||
"<strong style='color: #00008B;'>Successfully cleaned up input file</strong>"
|
||||
],
|
||||
"text/plain": [
|
||||
"<IPython.core.display.HTML object>"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
@@ -608,45 +979,57 @@
|
||||
" completion_window=\"24h\",\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"print(f\"Created batch job with ID: {batch_job.id}\")\n",
|
||||
"print(f\"Initial status: {batch_job.status}\")\n",
|
||||
"print_highlight(f\"Created batch job with ID: {batch_job.id}\")\n",
|
||||
"print_highlight(f\"Initial status: {batch_job.status}\")\n",
|
||||
"\n",
|
||||
"time.sleep(10)\n",
|
||||
"\n",
|
||||
"try:\n",
|
||||
" cancelled_job = client.batches.cancel(batch_id=batch_job.id)\n",
|
||||
" print(f\"Cancellation initiated. Status: {cancelled_job.status}\")\n",
|
||||
" print_highlight(f\"Cancellation initiated. Status: {cancelled_job.status}\")\n",
|
||||
" assert cancelled_job.status == \"cancelling\"\n",
|
||||
"\n",
|
||||
" # Monitor the cancellation process\n",
|
||||
" while cancelled_job.status not in [\"failed\", \"cancelled\"]:\n",
|
||||
" time.sleep(3)\n",
|
||||
" cancelled_job = client.batches.retrieve(batch_job.id)\n",
|
||||
" print(f\"Current status: {cancelled_job.status}\")\n",
|
||||
" print_highlight(f\"Current status: {cancelled_job.status}\")\n",
|
||||
"\n",
|
||||
" # Verify final status\n",
|
||||
" assert cancelled_job.status == \"cancelled\"\n",
|
||||
" print(\"Batch job successfully cancelled\")\n",
|
||||
" print_highlight(\"Batch job successfully cancelled\")\n",
|
||||
"\n",
|
||||
"except Exception as e:\n",
|
||||
" print(f\"Error during cancellation: {e}\")\n",
|
||||
" print_highlight(f\"Error during cancellation: {e}\")\n",
|
||||
" raise e\n",
|
||||
"\n",
|
||||
"finally:\n",
|
||||
" try:\n",
|
||||
" del_response = client.files.delete(uploaded_file.id)\n",
|
||||
" if del_response.deleted:\n",
|
||||
" print(\"Successfully cleaned up input file\")\n",
|
||||
" print_highlight(\"Successfully cleaned up input file\")\n",
|
||||
" except Exception as e:\n",
|
||||
" print(f\"Error cleaning up: {e}\")\n",
|
||||
" print_highlight(f\"Error cleaning up: {e}\")\n",
|
||||
" raise e"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 47,
|
||||
"execution_count": 10,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"[2024-10-28 02:03:36] INFO: Shutting down\n",
|
||||
"[2024-10-28 02:03:36] INFO: Waiting for application shutdown.\n",
|
||||
"[2024-10-28 02:03:36] INFO: Application shutdown complete.\n",
|
||||
"[2024-10-28 02:03:36] INFO: Finished server process [1185529]\n",
|
||||
"W1028 02:03:37.084000 140231994889792 torch/_inductor/compile_worker/subproc_pool.py:126] SubprocPool unclean exit\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"terminate_process(server_process)"
|
||||
]
|
||||
|
||||
Reference in New Issue
Block a user