diff --git a/python/sglang/backend/runtime_endpoint.py b/python/sglang/backend/runtime_endpoint.py index 899ba09e2..aba69f00c 100644 --- a/python/sglang/backend/runtime_endpoint.py +++ b/python/sglang/backend/runtime_endpoint.py @@ -73,9 +73,11 @@ class RuntimeEndpoint(BaseBackend): assert res.status_code == 200 def commit_lazy_operations(self, s: StreamExecutor): + data = {"text": s.text_, "sampling_params": {"max_new_tokens": 0}} + self._add_images(s, data) res = http_request( self.base_url + "/generate", - json={"text": s.text_, "sampling_params": {"max_new_tokens": 0}}, + json=data, auth_token=self.auth_token, api_key=self.api_key, verify=self.verify, diff --git a/python/sglang/lang/interpreter.py b/python/sglang/lang/interpreter.py index 22b6106da..708df89aa 100644 --- a/python/sglang/lang/interpreter.py +++ b/python/sglang/lang/interpreter.py @@ -276,6 +276,7 @@ class StreamExecutor: exes[i].messages_ = list(self.messages_) exes[i].cur_role = self.cur_role exes[i].fork_start_text_pos = len(self.text_) + exes[i].images_ = list(self.images_) return exes