From e2bf732bc3dda44f800caa1aa3d2786ee66d93d1 Mon Sep 17 00:00:00 2001 From: LiviaSun <33578456+ChuyueSun@users.noreply.github.com> Date: Tue, 6 Feb 2024 12:38:41 +0800 Subject: [PATCH] add openai error handler with retry and logger (#148) --- python/sglang/backend/openai.py | 89 ++++++++++++++++++++------------- 1 file changed, 55 insertions(+), 34 deletions(-) diff --git a/python/sglang/backend/openai.py b/python/sglang/backend/openai.py index 3f713cae5..e62c4a61d 100644 --- a/python/sglang/backend/openai.py +++ b/python/sglang/backend/openai.py @@ -1,3 +1,5 @@ +import logging +import time from typing import Callable, List, Optional, Union import numpy as np @@ -13,6 +15,9 @@ except ImportError as e: openai = tiktoken = e +logger = logging.getLogger("openai") + + def create_logit_bias_int(tokenizer): """Get logit bias for integer numbers.""" int_token_ids = [] @@ -199,42 +204,58 @@ class OpenAI(BaseBackend): return decision, scores, scores -def openai_completion(client, is_chat=None, prompt=None, **kwargs): - try: - if is_chat: - if kwargs["stop"] is None: - kwargs.pop("stop") - ret = client.chat.completions.create(messages=prompt, **kwargs) - comp = ret.choices[0].message.content - else: - ret = client.completions.create(prompt=prompt, **kwargs) - if isinstance(prompt, (list, tuple)): - comp = [c.text for c in ret.choices] +def openai_completion(client, retries=3, is_chat=None, prompt=None, **kwargs): + for attempt in range(retries): + try: + if is_chat: + if "stop" in kwargs and kwargs["stop"] is None: + kwargs.pop("stop") + ret = client.chat.completions.create(messages=prompt, **kwargs) + comp = ret.choices[0].message.content else: - comp = ret.choices[0].text - except openai.OpenAIError as e: - print(f"OpenAI Error: {e}") - raise e + ret = client.completions.create(prompt=prompt, **kwargs) + if isinstance(prompt, (list, tuple)): + comp = [c.text for c in ret.choices] + else: + comp = ret.choices[0].text + break + except (openai.APIError, openai.APIConnectionError, openai.RateLimitError) as e: + logger.error(f"OpenAI Error: {e}. Waiting 5 seconds...") + time.sleep(5) + if attempt == retries - 1: + raise e + except Exception as e: + logger.error(f"RuntimeError {e}.") + raise e return comp -def openai_completion_stream(client, is_chat=None, prompt=None, **kwargs): - try: - if is_chat: - if kwargs["stop"] is None: - kwargs.pop("stop") - generator = client.chat.completions.create( - messages=prompt, stream=True, **kwargs - ) - for ret in generator: - content = ret.choices[0].delta.content - yield content or "", {} - else: - generator = client.completions.create(prompt=prompt, stream=True, **kwargs) - for ret in generator: - content = ret.choices[0].text - yield content or "", {} - except openai.OpenAIError as e: - print(f"OpenAI Error: {e}") - raise e +def openai_completion_stream(client, retries=3, is_chat=None, prompt=None, **kwargs): + for attempt in range(retries): + try: + if is_chat: + if "stop" in kwargs and kwargs["stop"] is None: + kwargs.pop("stop") + generator = client.chat.completions.create( + messages=prompt, stream=True, **kwargs + ) + for ret in generator: + content = ret.choices[0].delta.content + yield content or "", {} + else: + generator = client.completions.create( + prompt=prompt, stream=True, **kwargs + ) + for ret in generator: + content = ret.choices[0].text + yield content or "", {} + break + except (openai.APIError, openai.APIConnectionError, openai.RateLimitError) as e: + logger.error(f"OpenAI Error: {e}. Waiting 5 seconds...") + time.sleep(5) + if attempt == retries - 1: + raise e + except Exception as e: + logger.error(f"RuntimeError {e}.") + raise e