[router][grpc] Fix proto3 default value mismatches and cleanup unused fields (#11283)
This commit is contained in:
@@ -202,6 +202,7 @@ impl SglangSchedulerClient {
|
||||
stop: stop_sequences,
|
||||
stop_token_ids: request.stop_token_ids.clone().unwrap_or_default(),
|
||||
skip_special_tokens,
|
||||
spaces_between_special_tokens: true, // Default from Python SamplingParams
|
||||
ignore_eos: request.ignore_eos,
|
||||
no_stop_trim: request.no_stop_trim,
|
||||
n: request.n.unwrap_or(1) as i32,
|
||||
@@ -301,6 +302,8 @@ impl SglangSchedulerClient {
|
||||
top_k: -1,
|
||||
repetition_penalty: 1.0,
|
||||
n: 1,
|
||||
skip_special_tokens: true,
|
||||
spaces_between_special_tokens: true,
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
@@ -444,10 +447,24 @@ mod tests {
|
||||
#[test]
|
||||
fn test_sampling_params_defaults() {
|
||||
let params = proto::SamplingParams::default();
|
||||
// Numeric fields have proto defaults (0)
|
||||
assert_eq!(params.temperature, 0.0);
|
||||
assert_eq!(params.max_new_tokens, None);
|
||||
assert_eq!(params.top_p, 0.0);
|
||||
assert_eq!(params.top_k, 0);
|
||||
assert_eq!(params.repetition_penalty, 0.0);
|
||||
assert_eq!(params.n, 0);
|
||||
// Bool fields have proto defaults (false)
|
||||
assert!(!params.skip_special_tokens);
|
||||
assert!(!params.spaces_between_special_tokens);
|
||||
assert!(!params.ignore_eos);
|
||||
assert!(!params.no_stop_trim);
|
||||
// Optional int fields should be None
|
||||
assert_eq!(params.max_new_tokens, None);
|
||||
assert_eq!(params.stream_interval, None);
|
||||
// Other non-optional fields
|
||||
assert_eq!(params.min_p, 0.0);
|
||||
assert_eq!(params.frequency_penalty, 0.0);
|
||||
assert_eq!(params.presence_penalty, 0.0);
|
||||
assert!(params.stop.is_empty());
|
||||
}
|
||||
|
||||
|
||||
@@ -27,6 +27,11 @@ service SglangScheduler {
|
||||
// =====================
|
||||
|
||||
// Sampling parameters matching SGLang's SamplingParams
|
||||
//
|
||||
// IMPORTANT: Do not use SamplingParams::default() directly!
|
||||
// The proto3 defaults (0 for numeric fields) do NOT match the semantic defaults
|
||||
// (temperature=1.0, top_p=1.0, top_k=-1, etc.). Always construct with explicit values
|
||||
// or use the conversion functions in sglang_scheduler.rs / grpc_server.py.
|
||||
message SamplingParams {
|
||||
float temperature = 1;
|
||||
float top_p = 2;
|
||||
@@ -50,24 +55,18 @@ message SamplingParams {
|
||||
string structural_tag = 16;
|
||||
}
|
||||
|
||||
// LoRA adapter
|
||||
string lora_path = 17;
|
||||
|
||||
// Speculative decoding
|
||||
int32 n = 18; // Number of samples
|
||||
|
||||
// Token healing
|
||||
bool token_healing = 19;
|
||||
int32 n = 17; // Number of samples
|
||||
|
||||
// Additional parameters
|
||||
int32 min_new_tokens = 20;
|
||||
bool ignore_eos = 21;
|
||||
bool no_stop_trim = 22;
|
||||
int32 stream_interval = 23;
|
||||
map<string, float> logit_bias = 24;
|
||||
int32 min_new_tokens = 18;
|
||||
bool ignore_eos = 19;
|
||||
bool no_stop_trim = 20;
|
||||
optional int32 stream_interval = 21;
|
||||
map<string, float> logit_bias = 22;
|
||||
|
||||
// Custom parameters for extensibility
|
||||
google.protobuf.Struct custom_params = 25;
|
||||
google.protobuf.Struct custom_params = 23;
|
||||
}
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user