diff --git a/python/sglang/backend/runtime_endpoint.py b/python/sglang/backend/runtime_endpoint.py index bf5a58197..5c39a61b1 100644 --- a/python/sglang/backend/runtime_endpoint.py +++ b/python/sglang/backend/runtime_endpoint.py @@ -12,15 +12,16 @@ from sglang.utils import encode_image_base64, find_printable_text, http_request class RuntimeEndpoint(BaseBackend): - def __init__(self, base_url, auth_token=None): + def __init__(self, base_url, auth_token=None, verify=None): super().__init__() self.support_concate_and_append = True self.base_url = base_url self.auth_token = auth_token + self.verify = verify res = http_request( - self.base_url + "/get_model_info", auth_token=self.auth_token + self.base_url + "/get_model_info", auth_token=self.auth_token, verify=self.verify ) assert res.status_code == 200 self.model_info = res.json() @@ -40,6 +41,7 @@ class RuntimeEndpoint(BaseBackend): self.base_url + "/generate", json={"text": prefix_str, "sampling_params": {"max_new_tokens": 0}}, auth_token=self.auth_token, + verify=self.verify ) assert res.status_code == 200 @@ -48,6 +50,7 @@ class RuntimeEndpoint(BaseBackend): self.base_url + "/generate", json={"text": s.text_, "sampling_params": {"max_new_tokens": 0}}, auth_token=self.auth_token, + verify=self.verify ) assert res.status_code == 200 @@ -55,7 +58,7 @@ class RuntimeEndpoint(BaseBackend): data = {"text": s.text_, "sampling_params": {"max_new_tokens": 0}} self._add_images(s, data) res = http_request( - self.base_url + "/generate", json=data, auth_token=self.auth_token + self.base_url + "/generate", json=data, auth_token=self.auth_token, verify=self.verify ) assert res.status_code == 200 @@ -87,7 +90,7 @@ class RuntimeEndpoint(BaseBackend): self._add_images(s, data) res = http_request( - self.base_url + "/generate", json=data, auth_token=self.auth_token + self.base_url + "/generate", json=data, auth_token=self.auth_token, verify=self.verify ) obj = res.json() comp = obj["text"] @@ -126,6 +129,7 @@ class RuntimeEndpoint(BaseBackend): json=data, stream=True, auth_token=self.auth_token, + verify=self.verify ) pos = 0 @@ -157,7 +161,7 @@ class RuntimeEndpoint(BaseBackend): data = {"text": s.text_, "sampling_params": {"max_new_tokens": 0}} self._add_images(s, data) res = http_request( - self.base_url + "/generate", json=data, auth_token=self.auth_token + self.base_url + "/generate", json=data, auth_token=self.auth_token, verify=self.verify ) assert res.status_code == 200 prompt_len = res.json()["meta_info"]["prompt_tokens"] @@ -171,7 +175,7 @@ class RuntimeEndpoint(BaseBackend): } self._add_images(s, data) res = http_request( - self.base_url + "/generate", json=data, auth_token=self.auth_token + self.base_url + "/generate", json=data, auth_token=self.auth_token, verify=self.verify ) assert res.status_code == 200 obj = res.json() @@ -188,6 +192,7 @@ class RuntimeEndpoint(BaseBackend): self.base_url + "/concate_and_append_request", json={"src_rids": src_rids, "dst_rid": dst_rid}, auth_token=self.auth_token, + verify=self.verify ) assert res.status_code == 200 diff --git a/python/sglang/utils.py b/python/sglang/utils.py index 3c55797fc..4bcb0fb37 100644 --- a/python/sglang/utils.py +++ b/python/sglang/utils.py @@ -88,16 +88,16 @@ class HttpResponse: return self.resp.status -def http_request(url, json=None, stream=False, auth_token=None): +def http_request(url, json=None, stream=False, auth_token=None, verify=None): """A faster version of requests.post with low-level urllib API.""" if stream: if auth_token is None: - return requests.post(url, json=json, stream=True) + return requests.post(url, json=json, stream=True, verify=verify) headers = { "Content-Type": "application/json", "Authentication": f"Bearer {auth_token}", } - return requests.post(url, json=json, stream=True, headers=headers) + return requests.post(url, json=json, stream=True, headers=headers, verify=verify) else: req = urllib.request.Request(url) req.add_header("Content-Type", "application/json; charset=utf-8") @@ -107,7 +107,7 @@ def http_request(url, json=None, stream=False, auth_token=None): data = None else: data = bytes(dumps(json), encoding="utf-8") - resp = urllib.request.urlopen(req, data=data) + resp = urllib.request.urlopen(req, data=data, cafile=verify) return HttpResponse(resp)