Use Anthropic messages API (#304)

2024-03-22 22:23:31 +02:00
parent 08df63a6f8
commit e57f079275
6 changed files with 25 additions and 19 deletions
--- a/python/pyproject.toml
+++ b/python/pyproject.toml
@@ -22,7 +22,7 @@ srt = ["aiohttp", "fastapi", "psutil", "rpyc", "torch", "uvloop", "uvicorn",
       "zmq", "vllm>=0.3.3", "interegular", "lark", "numba",
       "pydantic", "referencing", "diskcache", "cloudpickle", "pillow", "outlines>=0.0.27"]
 openai = ["openai>=1.0", "numpy"]
-anthropic = ["anthropic", "numpy"]
+anthropic = ["anthropic>=0.20.0", "numpy"]
 all = ["sglang[srt]", "sglang[openai]", "sglang[anthropic]"]

 [project.urls]
--- a/python/sglang/backend/anthropic.py
+++ b/python/sglang/backend/anthropic.py
@@ -30,13 +30,17 @@ class Anthropic(BaseBackend):
        s: StreamExecutor,
        sampling_params: SglSamplingParams,
    ):
-        prompt = s.text_
-        ret = anthropic.Anthropic().completions.create(
+        if s.messages_:
+            messages = s.messages_
+        else:
+            messages = [{"role": "user", "content": s.text_}]
+
+        ret = anthropic.Anthropic().messages.create(
            model=self.model_name,
-            prompt=prompt,
+            messages=messages,
            **sampling_params.to_anthropic_kwargs(),
        )
-        comp = ret.completion
+        comp = ret.content[0].text

        return comp, {}

@@ -45,13 +49,15 @@ class Anthropic(BaseBackend):
        s: StreamExecutor,
        sampling_params: SglSamplingParams,
    ):
-        prompt = s.text_
-        generator = anthropic.Anthropic().completions.create(
-            model=self.model_name,
-            prompt=prompt,
-            stream=True,
-            **sampling_params.to_anthropic_kwargs(),
-        )
+        if s.messages_:
+            messages = s.messages_
+        else:
+            messages = [{"role": "user", "content": s.text_}]

-        for ret in generator:
-            yield ret.completion, {}
+        with anthropic.Anthropic().messages.stream(
+            model=self.model_name,
+            messages=messages,
+            **sampling_params.to_anthropic_kwargs(),
+        ) as stream:
+            for text in stream.text_stream:
+                yield text, {}
--- a/python/sglang/lang/ir.py
+++ b/python/sglang/lang/ir.py
@@ -73,7 +73,7 @@ class SglSamplingParams:
                "Regular expression is not supported in the Anthropic backend."
            )
        return {
-            "max_tokens_to_sample": self.max_new_tokens,
+            "max_tokens": self.max_new_tokens,
            "stop_sequences": (
                self.stop if isinstance(self.stop, (list, tuple)) else [self.stop]
            ),