router: Fix constraint proto and build_constraint in grpc router (#10881)
This commit is contained in:
@@ -438,6 +438,7 @@ class SGLangSchedulerServicer(sglang_scheduler_pb2_grpc.SglangSchedulerServicer)
|
||||
regex = None
|
||||
json_schema = None
|
||||
ebnf_grammar = None
|
||||
structural_tag = None
|
||||
|
||||
if grpc_params.HasField("regex"):
|
||||
regex = grpc_params.regex
|
||||
@@ -445,6 +446,8 @@ class SGLangSchedulerServicer(sglang_scheduler_pb2_grpc.SglangSchedulerServicer)
|
||||
json_schema = grpc_params.json_schema
|
||||
elif grpc_params.HasField("ebnf_grammar"):
|
||||
ebnf_grammar = grpc_params.ebnf_grammar
|
||||
elif grpc_params.HasField("structural_tag"):
|
||||
structural_tag = grpc_params.structural_tag
|
||||
|
||||
return SGLSamplingParams(
|
||||
temperature=grpc_params.temperature or 1.0,
|
||||
@@ -465,6 +468,7 @@ class SGLangSchedulerServicer(sglang_scheduler_pb2_grpc.SglangSchedulerServicer)
|
||||
regex=regex,
|
||||
json_schema=json_schema,
|
||||
ebnf=ebnf_grammar,
|
||||
structural_tag=structural_tag,
|
||||
n=grpc_params.n or 1,
|
||||
ignore_eos=grpc_params.ignore_eos,
|
||||
)
|
||||
|
||||
@@ -47,24 +47,24 @@ message SamplingParams {
|
||||
string regex = 13;
|
||||
string json_schema = 14;
|
||||
string ebnf_grammar = 15;
|
||||
string structural_tag = 16;
|
||||
}
|
||||
|
||||
// LoRA adapter
|
||||
string lora_path = 16;
|
||||
string lora_path = 17;
|
||||
|
||||
// Speculative decoding
|
||||
int32 n = 17; // Number of samples
|
||||
int32 n = 18; // Number of samples
|
||||
|
||||
// Token healing
|
||||
bool token_healing = 18;
|
||||
bool token_healing = 19;
|
||||
|
||||
// Additional parameters
|
||||
int32 min_new_tokens = 19;
|
||||
bool ignore_eos = 20;
|
||||
bool no_stop_trim = 21;
|
||||
int32 stream_interval = 22;
|
||||
map<string, float> logit_bias = 23;
|
||||
string structural_tag = 24;
|
||||
int32 min_new_tokens = 20;
|
||||
bool ignore_eos = 21;
|
||||
bool no_stop_trim = 22;
|
||||
int32 stream_interval = 23;
|
||||
map<string, float> logit_bias = 24;
|
||||
|
||||
// Custom parameters for extensibility
|
||||
google.protobuf.Struct custom_params = 25;
|
||||
|
||||
File diff suppressed because one or more lines are too long
@@ -12,7 +12,7 @@ from typing import ClassVar as _ClassVar, Optional as _Optional, Union as _Union
|
||||
DESCRIPTOR: _descriptor.FileDescriptor
|
||||
|
||||
class SamplingParams(_message.Message):
|
||||
__slots__ = ("temperature", "top_p", "top_k", "min_p", "frequency_penalty", "presence_penalty", "repetition_penalty", "max_new_tokens", "stop", "stop_token_ids", "skip_special_tokens", "spaces_between_special_tokens", "regex", "json_schema", "ebnf_grammar", "lora_path", "n", "token_healing", "min_new_tokens", "ignore_eos", "no_stop_trim", "stream_interval", "logit_bias", "structural_tag", "custom_params")
|
||||
__slots__ = ("temperature", "top_p", "top_k", "min_p", "frequency_penalty", "presence_penalty", "repetition_penalty", "max_new_tokens", "stop", "stop_token_ids", "skip_special_tokens", "spaces_between_special_tokens", "regex", "json_schema", "ebnf_grammar", "structural_tag", "lora_path", "n", "token_healing", "min_new_tokens", "ignore_eos", "no_stop_trim", "stream_interval", "logit_bias", "custom_params")
|
||||
class LogitBiasEntry(_message.Message):
|
||||
__slots__ = ("key", "value")
|
||||
KEY_FIELD_NUMBER: _ClassVar[int]
|
||||
@@ -35,6 +35,7 @@ class SamplingParams(_message.Message):
|
||||
REGEX_FIELD_NUMBER: _ClassVar[int]
|
||||
JSON_SCHEMA_FIELD_NUMBER: _ClassVar[int]
|
||||
EBNF_GRAMMAR_FIELD_NUMBER: _ClassVar[int]
|
||||
STRUCTURAL_TAG_FIELD_NUMBER: _ClassVar[int]
|
||||
LORA_PATH_FIELD_NUMBER: _ClassVar[int]
|
||||
N_FIELD_NUMBER: _ClassVar[int]
|
||||
TOKEN_HEALING_FIELD_NUMBER: _ClassVar[int]
|
||||
@@ -43,7 +44,6 @@ class SamplingParams(_message.Message):
|
||||
NO_STOP_TRIM_FIELD_NUMBER: _ClassVar[int]
|
||||
STREAM_INTERVAL_FIELD_NUMBER: _ClassVar[int]
|
||||
LOGIT_BIAS_FIELD_NUMBER: _ClassVar[int]
|
||||
STRUCTURAL_TAG_FIELD_NUMBER: _ClassVar[int]
|
||||
CUSTOM_PARAMS_FIELD_NUMBER: _ClassVar[int]
|
||||
temperature: float
|
||||
top_p: float
|
||||
@@ -60,6 +60,7 @@ class SamplingParams(_message.Message):
|
||||
regex: str
|
||||
json_schema: str
|
||||
ebnf_grammar: str
|
||||
structural_tag: str
|
||||
lora_path: str
|
||||
n: int
|
||||
token_healing: bool
|
||||
@@ -68,9 +69,8 @@ class SamplingParams(_message.Message):
|
||||
no_stop_trim: bool
|
||||
stream_interval: int
|
||||
logit_bias: _containers.ScalarMap[str, float]
|
||||
structural_tag: str
|
||||
custom_params: _struct_pb2.Struct
|
||||
def __init__(self, temperature: _Optional[float] = ..., top_p: _Optional[float] = ..., top_k: _Optional[int] = ..., min_p: _Optional[float] = ..., frequency_penalty: _Optional[float] = ..., presence_penalty: _Optional[float] = ..., repetition_penalty: _Optional[float] = ..., max_new_tokens: _Optional[int] = ..., stop: _Optional[_Iterable[str]] = ..., stop_token_ids: _Optional[_Iterable[int]] = ..., skip_special_tokens: bool = ..., spaces_between_special_tokens: bool = ..., regex: _Optional[str] = ..., json_schema: _Optional[str] = ..., ebnf_grammar: _Optional[str] = ..., lora_path: _Optional[str] = ..., n: _Optional[int] = ..., token_healing: bool = ..., min_new_tokens: _Optional[int] = ..., ignore_eos: bool = ..., no_stop_trim: bool = ..., stream_interval: _Optional[int] = ..., logit_bias: _Optional[_Mapping[str, float]] = ..., structural_tag: _Optional[str] = ..., custom_params: _Optional[_Union[_struct_pb2.Struct, _Mapping]] = ...) -> None: ...
|
||||
def __init__(self, temperature: _Optional[float] = ..., top_p: _Optional[float] = ..., top_k: _Optional[int] = ..., min_p: _Optional[float] = ..., frequency_penalty: _Optional[float] = ..., presence_penalty: _Optional[float] = ..., repetition_penalty: _Optional[float] = ..., max_new_tokens: _Optional[int] = ..., stop: _Optional[_Iterable[str]] = ..., stop_token_ids: _Optional[_Iterable[int]] = ..., skip_special_tokens: bool = ..., spaces_between_special_tokens: bool = ..., regex: _Optional[str] = ..., json_schema: _Optional[str] = ..., ebnf_grammar: _Optional[str] = ..., structural_tag: _Optional[str] = ..., lora_path: _Optional[str] = ..., n: _Optional[int] = ..., token_healing: bool = ..., min_new_tokens: _Optional[int] = ..., ignore_eos: bool = ..., no_stop_trim: bool = ..., stream_interval: _Optional[int] = ..., logit_bias: _Optional[_Mapping[str, float]] = ..., custom_params: _Optional[_Union[_struct_pb2.Struct, _Mapping]] = ...) -> None: ...
|
||||
|
||||
class DisaggregatedParams(_message.Message):
|
||||
__slots__ = ("bootstrap_host", "bootstrap_port", "bootstrap_room")
|
||||
|
||||
@@ -1,3 +1,6 @@
|
||||
# This file is auto-generated. Do not edit manually.
|
||||
# Regenerate with: python compile_proto.py
|
||||
|
||||
# Generated by the gRPC Python protocol compiler plugin. DO NOT EDIT!
|
||||
"""Client and server classes corresponding to protobuf-defined services."""
|
||||
import grpc
|
||||
|
||||
Reference in New Issue
Block a user