Co-authored-by: Ying Sheng <sqy1415@gmail.com>
This commit is contained in:
@@ -33,7 +33,7 @@ The `sampling_params` follows this format
|
|||||||
|
|
||||||
```python
|
```python
|
||||||
# The maximum number of output tokens
|
# The maximum number of output tokens
|
||||||
max_new_tokens: int = 16,
|
max_new_tokens: int = 128,
|
||||||
# Stop when hitting any of the strings in this list.
|
# Stop when hitting any of the strings in this list.
|
||||||
stop: Optional[Union[str, List[str]]] = None,
|
stop: Optional[Union[str, List[str]]] = None,
|
||||||
# Sampling temperature
|
# Sampling temperature
|
||||||
@@ -90,7 +90,7 @@ response = requests.post(
|
|||||||
"text": "The capital of France is",
|
"text": "The capital of France is",
|
||||||
"sampling_params": {
|
"sampling_params": {
|
||||||
"temperature": 0,
|
"temperature": 0,
|
||||||
"max_new_tokens": 256,
|
"max_new_tokens": 32,
|
||||||
},
|
},
|
||||||
"stream": True,
|
"stream": True,
|
||||||
},
|
},
|
||||||
|
|||||||
@@ -125,7 +125,7 @@ class CompiledFunction:
|
|||||||
def run(
|
def run(
|
||||||
self,
|
self,
|
||||||
*,
|
*,
|
||||||
max_new_tokens: int = 16,
|
max_new_tokens: int = 128,
|
||||||
stop: Union[str, List[str]] = (),
|
stop: Union[str, List[str]] = (),
|
||||||
temperature: float = 1.0,
|
temperature: float = 1.0,
|
||||||
top_p: float = 1.0,
|
top_p: float = 1.0,
|
||||||
@@ -155,7 +155,7 @@ class CompiledFunction:
|
|||||||
self,
|
self,
|
||||||
batch_kwargs,
|
batch_kwargs,
|
||||||
*,
|
*,
|
||||||
max_new_tokens: int = 16,
|
max_new_tokens: int = 128,
|
||||||
stop: Union[str, List[str]] = (),
|
stop: Union[str, List[str]] = (),
|
||||||
temperature: float = 1.0,
|
temperature: float = 1.0,
|
||||||
top_p: float = 1.0,
|
top_p: float = 1.0,
|
||||||
|
|||||||
@@ -16,7 +16,7 @@ REGEX_STRING = r"\"[\w\d\s]*\"" # bugs with regex r"\".*\"" in interegular pkg
|
|||||||
|
|
||||||
@dataclasses.dataclass
|
@dataclasses.dataclass
|
||||||
class SglSamplingParams:
|
class SglSamplingParams:
|
||||||
max_new_tokens: int = 16
|
max_new_tokens: int = 128
|
||||||
stop: Union[str, List[str]] = ()
|
stop: Union[str, List[str]] = ()
|
||||||
temperature: float = 1.0
|
temperature: float = 1.0
|
||||||
top_p: float = 1.0
|
top_p: float = 1.0
|
||||||
@@ -140,7 +140,7 @@ class SglFunction:
|
|||||||
def run(
|
def run(
|
||||||
self,
|
self,
|
||||||
*args,
|
*args,
|
||||||
max_new_tokens: int = 16,
|
max_new_tokens: int = 128,
|
||||||
stop: Union[str, List[str]] = (),
|
stop: Union[str, List[str]] = (),
|
||||||
temperature: float = 1.0,
|
temperature: float = 1.0,
|
||||||
top_p: float = 1.0,
|
top_p: float = 1.0,
|
||||||
@@ -179,7 +179,7 @@ class SglFunction:
|
|||||||
self,
|
self,
|
||||||
batch_kwargs,
|
batch_kwargs,
|
||||||
*,
|
*,
|
||||||
max_new_tokens: int = 16,
|
max_new_tokens: int = 128,
|
||||||
stop: Union[str, List[str]] = (),
|
stop: Union[str, List[str]] = (),
|
||||||
temperature: float = 1.0,
|
temperature: float = 1.0,
|
||||||
top_p: float = 1.0,
|
top_p: float = 1.0,
|
||||||
|
|||||||
@@ -23,7 +23,7 @@ _SAMPLING_EPS = 1e-6
|
|||||||
class SamplingParams:
|
class SamplingParams:
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
max_new_tokens: int = 16,
|
max_new_tokens: int = 128,
|
||||||
stop: Optional[Union[str, List[str]]] = None,
|
stop: Optional[Union[str, List[str]]] = None,
|
||||||
temperature: float = 1.0,
|
temperature: float = 1.0,
|
||||||
top_p: float = 1.0,
|
top_p: float = 1.0,
|
||||||
|
|||||||
@@ -3,7 +3,7 @@ curl http://localhost:30000/generate \
|
|||||||
-d '{
|
-d '{
|
||||||
"text": "Once upon a time,",
|
"text": "Once upon a time,",
|
||||||
"sampling_params": {
|
"sampling_params": {
|
||||||
"max_new_tokens": 16,
|
"max_new_tokens": 64,
|
||||||
"temperature": 0
|
"temperature": 0
|
||||||
}
|
}
|
||||||
}'
|
}'
|
||||||
|
|||||||
@@ -36,7 +36,7 @@ async def test_concurrent(args):
|
|||||||
"image_data": "example_image.png",
|
"image_data": "example_image.png",
|
||||||
"sampling_params": {
|
"sampling_params": {
|
||||||
"temperature": 0,
|
"temperature": 0,
|
||||||
"max_new_tokens": 16,
|
"max_new_tokens": 64,
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
|
|||||||
Reference in New Issue
Block a user