[router][grpc] Support streaming for v1/chat/completions (#11179)

This commit is contained in:
Chang Su
2025-10-02 14:35:16 -07:00
committed by GitHub
parent 0618ad6dd5
commit 963175d5c0
30 changed files with 912 additions and 228 deletions

View File

@@ -179,6 +179,9 @@ message GenerateStreamChunk {
// Input logprobs (if requested) - only in first chunk
InputLogProbs input_logprobs = 7;
// Index for ordering when n>1 (for parallel request multiplexing)
uint32 index = 8;
}
message GenerateComplete {
@@ -207,6 +210,9 @@ message GenerateComplete {
// Input logprobs if requested (for prompt tokens)
InputLogProbs input_logprobs = 10;
// Index for ordering when n>1 (for parallel request multiplexing)
uint32 index = 11;
}
message GenerateError {