Use Anthropic messages API (#304)
This commit is contained in:
@@ -22,7 +22,7 @@ srt = ["aiohttp", "fastapi", "psutil", "rpyc", "torch", "uvloop", "uvicorn",
|
||||
"zmq", "vllm>=0.3.3", "interegular", "lark", "numba",
|
||||
"pydantic", "referencing", "diskcache", "cloudpickle", "pillow", "outlines>=0.0.27"]
|
||||
openai = ["openai>=1.0", "numpy"]
|
||||
anthropic = ["anthropic", "numpy"]
|
||||
anthropic = ["anthropic>=0.20.0", "numpy"]
|
||||
all = ["sglang[srt]", "sglang[openai]", "sglang[anthropic]"]
|
||||
|
||||
[project.urls]
|
||||
|
||||
@@ -30,13 +30,17 @@ class Anthropic(BaseBackend):
|
||||
s: StreamExecutor,
|
||||
sampling_params: SglSamplingParams,
|
||||
):
|
||||
prompt = s.text_
|
||||
ret = anthropic.Anthropic().completions.create(
|
||||
if s.messages_:
|
||||
messages = s.messages_
|
||||
else:
|
||||
messages = [{"role": "user", "content": s.text_}]
|
||||
|
||||
ret = anthropic.Anthropic().messages.create(
|
||||
model=self.model_name,
|
||||
prompt=prompt,
|
||||
messages=messages,
|
||||
**sampling_params.to_anthropic_kwargs(),
|
||||
)
|
||||
comp = ret.completion
|
||||
comp = ret.content[0].text
|
||||
|
||||
return comp, {}
|
||||
|
||||
@@ -45,13 +49,15 @@ class Anthropic(BaseBackend):
|
||||
s: StreamExecutor,
|
||||
sampling_params: SglSamplingParams,
|
||||
):
|
||||
prompt = s.text_
|
||||
generator = anthropic.Anthropic().completions.create(
|
||||
model=self.model_name,
|
||||
prompt=prompt,
|
||||
stream=True,
|
||||
**sampling_params.to_anthropic_kwargs(),
|
||||
)
|
||||
if s.messages_:
|
||||
messages = s.messages_
|
||||
else:
|
||||
messages = [{"role": "user", "content": s.text_}]
|
||||
|
||||
for ret in generator:
|
||||
yield ret.completion, {}
|
||||
with anthropic.Anthropic().messages.stream(
|
||||
model=self.model_name,
|
||||
messages=messages,
|
||||
**sampling_params.to_anthropic_kwargs(),
|
||||
) as stream:
|
||||
for text in stream.text_stream:
|
||||
yield text, {}
|
||||
|
||||
@@ -73,7 +73,7 @@ class SglSamplingParams:
|
||||
"Regular expression is not supported in the Anthropic backend."
|
||||
)
|
||||
return {
|
||||
"max_tokens_to_sample": self.max_new_tokens,
|
||||
"max_tokens": self.max_new_tokens,
|
||||
"stop_sequences": (
|
||||
self.stop if isinstance(self.stop, (list, tuple)) else [self.stop]
|
||||
),
|
||||
|
||||
Reference in New Issue
Block a user