[router][grpc] Add helpfer functions for decoder in router.rs and fix specs (#10971)

This commit is contained in:
Chang Su
2025-09-26 17:10:45 -07:00
committed by GitHub
parent 2bdaf482f9
commit 0c3db88978
6 changed files with 309 additions and 108 deletions

View File

@@ -36,9 +36,9 @@ message SamplingParams {
float presence_penalty = 6;
float repetition_penalty = 7;
int32 max_new_tokens = 8;
optional int32 max_new_tokens = 8;
repeated string stop = 9;
repeated int32 stop_token_ids = 10;
repeated uint32 stop_token_ids = 10;
bool skip_special_tokens = 11;
bool spaces_between_special_tokens = 12;
@@ -98,7 +98,7 @@ message GenerateRequest {
bool return_logprob = 5;
int32 logprob_start_len = 6;
int32 top_logprobs_num = 7;
repeated int32 token_ids_logprob = 8;
repeated uint32 token_ids_logprob = 8;
bool return_hidden_states = 9;
// For disaggregated serving
@@ -129,7 +129,7 @@ message GenerateRequest {
message TokenizedInput {
string original_text = 1; // For reference
repeated int32 input_ids = 2;
repeated uint32 input_ids = 2;
}
message MultimodalInputs {
@@ -167,7 +167,7 @@ message GenerateResponse {
message GenerateStreamChunk {
// Generated tokens (incremental chunk)
repeated int32 token_ids = 1;
repeated uint32 token_ids = 1;
// Cumulative counts
int32 prompt_tokens = 2;
@@ -183,7 +183,7 @@ message GenerateStreamChunk {
message GenerateComplete {
// Final output
repeated int32 output_ids = 1;
repeated uint32 output_ids = 1;
// Finish reason
enum FinishReason {

File diff suppressed because one or more lines are too long