[router][bugfix] Fix input_logprobs handling with None value and logprob_start_len = -1 (#11113)
This commit is contained in:
@@ -175,13 +175,13 @@ message GenerateStreamChunk {
|
||||
int32 cached_tokens = 4;
|
||||
|
||||
// Output logprobs (if requested) - incremental for streaming
|
||||
LogProbs output_logprobs = 5;
|
||||
OutputLogProbs output_logprobs = 5;
|
||||
|
||||
// Hidden states (if requested)
|
||||
repeated float hidden_states = 6;
|
||||
|
||||
// Input logprobs (if requested) - only in first chunk
|
||||
LogProbs input_logprobs = 7;
|
||||
InputLogProbs input_logprobs = 7;
|
||||
}
|
||||
|
||||
message GenerateComplete {
|
||||
@@ -197,7 +197,7 @@ message GenerateComplete {
|
||||
int32 cached_tokens = 5;
|
||||
|
||||
// Output logprobs if requested (cumulative)
|
||||
LogProbs output_logprobs = 6;
|
||||
OutputLogProbs output_logprobs = 6;
|
||||
|
||||
// All hidden states if requested
|
||||
repeated HiddenStates all_hidden_states = 7;
|
||||
@@ -209,7 +209,7 @@ message GenerateComplete {
|
||||
}
|
||||
|
||||
// Input logprobs if requested (for prompt tokens)
|
||||
LogProbs input_logprobs = 10;
|
||||
InputLogProbs input_logprobs = 10;
|
||||
}
|
||||
|
||||
message GenerateError {
|
||||
@@ -218,7 +218,8 @@ message GenerateError {
|
||||
string details = 3;
|
||||
}
|
||||
|
||||
message LogProbs {
|
||||
// Output logprobs - all values are present (no None)
|
||||
message OutputLogProbs {
|
||||
repeated float token_logprobs = 1;
|
||||
repeated int32 token_ids = 2;
|
||||
|
||||
@@ -226,6 +227,20 @@ message LogProbs {
|
||||
repeated TopLogProbs top_logprobs = 3;
|
||||
}
|
||||
|
||||
// Input logprobs - first token has no logprob (None)
|
||||
message InputLogProbs {
|
||||
repeated InputTokenLogProb token_logprobs = 1;
|
||||
repeated int32 token_ids = 2;
|
||||
|
||||
// Top logprobs at each position
|
||||
repeated TopLogProbs top_logprobs = 3;
|
||||
}
|
||||
|
||||
// Wrapper to represent optional logprob (first input token has no logprob)
|
||||
message InputTokenLogProb {
|
||||
optional float value = 1;
|
||||
}
|
||||
|
||||
message TopLogProbs {
|
||||
repeated float values = 1;
|
||||
repeated int32 token_ids = 2;
|
||||
|
||||
@@ -1239,7 +1239,7 @@ impl GrpcRouter {
|
||||
/// Note: Always decodes with skip_special_tokens=false to show actual tokens generated
|
||||
fn convert_proto_to_openai_logprobs(
|
||||
&self,
|
||||
proto_logprobs: &proto::LogProbs,
|
||||
proto_logprobs: &proto::OutputLogProbs,
|
||||
) -> Result<crate::protocols::spec::ChatLogProbs, String> {
|
||||
let mut content_items = Vec::new();
|
||||
|
||||
|
||||
Reference in New Issue
Block a user