[2/2] Support deterministic inference for temperature > 0 (#10678)

Co-authored-by: Baizhou Zhang <sobereddiezhang@gmail.com>
Co-authored-by: hebiao064 <hebiaobuaa@gmail.com>
This commit is contained in:
Qiaolin Yu
2025-09-21 19:36:08 -07:00
committed by GitHub
parent 86527a4799
commit e2ac7888b8
12 changed files with 117 additions and 11 deletions

View File

@@ -367,6 +367,10 @@ pub struct ChatCompletionRequest {
/// Return model hidden states
#[serde(default)]
pub return_hidden_states: bool,
/// Random seed for sampling for deterministic outputs
#[serde(skip_serializing_if = "Option::is_none")]
pub sampling_seed: Option<u64>,
}
impl GenerationRequest for ChatCompletionRequest {
@@ -608,6 +612,10 @@ pub struct CompletionRequest {
#[serde(default)]
pub return_hidden_states: bool,
/// Sampling seed for deterministic outputs
#[serde(skip_serializing_if = "Option::is_none")]
pub sampling_seed: Option<u64>,
/// Additional fields including bootstrap info for PD routing
#[serde(flatten)]
pub other: serde_json::Map<String, serde_json::Value>,
@@ -1749,6 +1757,8 @@ pub struct SamplingParams {
pub stop_token_ids: Option<Vec<i32>>,
#[serde(skip_serializing_if = "Option::is_none")]
pub no_stop_trim: Option<bool>,
#[serde(skip_serializing_if = "Option::is_none")]
pub sampling_seed: Option<u64>,
}
#[derive(Clone, Debug, Serialize, Deserialize)]