[router] add py binding and readme for openai router and history backend (#11453)
Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
This commit is contained in:
@@ -29,9 +29,69 @@ impl ConfigValidator {
|
||||
Self::validate_retry(&retry_cfg)?;
|
||||
Self::validate_circuit_breaker(&cb_cfg)?;
|
||||
|
||||
if config.history_backend == HistoryBackend::Oracle && config.oracle.is_none() {
|
||||
// Validate Oracle configuration if enabled
|
||||
if config.history_backend == HistoryBackend::Oracle {
|
||||
if config.oracle.is_none() {
|
||||
return Err(ConfigError::MissingRequired {
|
||||
field: "oracle".to_string(),
|
||||
});
|
||||
}
|
||||
// Validate Oracle configuration details
|
||||
if let Some(oracle) = &config.oracle {
|
||||
Self::validate_oracle(oracle)?;
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Validate Oracle configuration
|
||||
fn validate_oracle(oracle: &OracleConfig) -> ConfigResult<()> {
|
||||
// Validate username is not empty
|
||||
if oracle.username.is_empty() {
|
||||
return Err(ConfigError::MissingRequired {
|
||||
field: "oracle".to_string(),
|
||||
field: "oracle.username".to_string(),
|
||||
});
|
||||
}
|
||||
|
||||
// Validate password is not empty
|
||||
if oracle.password.is_empty() {
|
||||
return Err(ConfigError::MissingRequired {
|
||||
field: "oracle.password".to_string(),
|
||||
});
|
||||
}
|
||||
|
||||
// Validate connect_descriptor is not empty
|
||||
if oracle.connect_descriptor.is_empty() {
|
||||
return Err(ConfigError::MissingRequired {
|
||||
field: "oracle_dsn or oracle_tns_alias".to_string(),
|
||||
});
|
||||
}
|
||||
|
||||
// Validate pool_min is at least 1
|
||||
if oracle.pool_min < 1 {
|
||||
return Err(ConfigError::InvalidValue {
|
||||
field: "oracle.pool_min".to_string(),
|
||||
value: oracle.pool_min.to_string(),
|
||||
reason: "Must be at least 1".to_string(),
|
||||
});
|
||||
}
|
||||
|
||||
// Validate pool_max is greater than or equal to pool_min
|
||||
if oracle.pool_max < oracle.pool_min {
|
||||
return Err(ConfigError::InvalidValue {
|
||||
field: "oracle.pool_max".to_string(),
|
||||
value: oracle.pool_max.to_string(),
|
||||
reason: "Must be >= oracle.pool_min".to_string(),
|
||||
});
|
||||
}
|
||||
|
||||
// Validate pool_timeout_secs is greater than 0
|
||||
if oracle.pool_timeout_secs == 0 {
|
||||
return Err(ConfigError::InvalidValue {
|
||||
field: "oracle.pool_timeout_secs".to_string(),
|
||||
value: oracle.pool_timeout_secs.to_string(),
|
||||
reason: "Must be > 0".to_string(),
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
@@ -30,6 +30,113 @@ pub enum PolicyType {
|
||||
PowerOfTwo,
|
||||
}
|
||||
|
||||
#[pyclass(eq)]
|
||||
#[derive(Clone, PartialEq, Debug)]
|
||||
pub enum BackendType {
|
||||
Sglang,
|
||||
Openai,
|
||||
}
|
||||
|
||||
#[pyclass(eq)]
|
||||
#[derive(Clone, PartialEq, Debug)]
|
||||
pub enum HistoryBackendType {
|
||||
Memory,
|
||||
None,
|
||||
Oracle,
|
||||
}
|
||||
|
||||
#[pyclass]
|
||||
#[derive(Clone, PartialEq)]
|
||||
pub struct PyOracleConfig {
|
||||
#[pyo3(get, set)]
|
||||
pub wallet_path: Option<String>,
|
||||
#[pyo3(get, set)]
|
||||
pub connect_descriptor: Option<String>,
|
||||
#[pyo3(get, set)]
|
||||
pub username: Option<String>,
|
||||
#[pyo3(get, set)]
|
||||
pub password: Option<String>,
|
||||
#[pyo3(get, set)]
|
||||
pub pool_min: usize,
|
||||
#[pyo3(get, set)]
|
||||
pub pool_max: usize,
|
||||
#[pyo3(get, set)]
|
||||
pub pool_timeout_secs: u64,
|
||||
}
|
||||
|
||||
impl std::fmt::Debug for PyOracleConfig {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
f.debug_struct("PyOracleConfig")
|
||||
.field("wallet_path", &self.wallet_path)
|
||||
.field("connect_descriptor", &"<redacted>")
|
||||
.field("username", &self.username)
|
||||
.field("password", &"<redacted>")
|
||||
.field("pool_min", &self.pool_min)
|
||||
.field("pool_max", &self.pool_max)
|
||||
.field("pool_timeout_secs", &self.pool_timeout_secs)
|
||||
.finish()
|
||||
}
|
||||
}
|
||||
|
||||
#[pymethods]
|
||||
impl PyOracleConfig {
|
||||
#[new]
|
||||
#[pyo3(signature = (
|
||||
password = None,
|
||||
username = None,
|
||||
connect_descriptor = None,
|
||||
wallet_path = None,
|
||||
pool_min = 1,
|
||||
pool_max = 16,
|
||||
pool_timeout_secs = 30,
|
||||
))]
|
||||
fn new(
|
||||
password: Option<String>,
|
||||
username: Option<String>,
|
||||
connect_descriptor: Option<String>,
|
||||
wallet_path: Option<String>,
|
||||
pool_min: usize,
|
||||
pool_max: usize,
|
||||
pool_timeout_secs: u64,
|
||||
) -> PyResult<Self> {
|
||||
if pool_min == 0 {
|
||||
return Err(pyo3::exceptions::PyValueError::new_err(
|
||||
"pool_min must be at least 1",
|
||||
));
|
||||
}
|
||||
if pool_max < pool_min {
|
||||
return Err(pyo3::exceptions::PyValueError::new_err(
|
||||
"pool_max must be >= pool_min",
|
||||
));
|
||||
}
|
||||
|
||||
Ok(PyOracleConfig {
|
||||
wallet_path,
|
||||
connect_descriptor,
|
||||
username,
|
||||
password,
|
||||
pool_min,
|
||||
pool_max,
|
||||
pool_timeout_secs,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl PyOracleConfig {
|
||||
fn to_config_oracle(&self) -> config::OracleConfig {
|
||||
// Simple conversion - validation happens later in validate_oracle()
|
||||
config::OracleConfig {
|
||||
wallet_path: self.wallet_path.clone(),
|
||||
connect_descriptor: self.connect_descriptor.clone().unwrap_or_default(),
|
||||
username: self.username.clone().unwrap_or_default(),
|
||||
password: self.password.clone().unwrap_or_default(),
|
||||
pool_min: self.pool_min,
|
||||
pool_max: self.pool_max,
|
||||
pool_timeout_secs: self.pool_timeout_secs,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[pyclass]
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
struct Router {
|
||||
@@ -93,6 +200,9 @@ struct Router {
|
||||
chat_template: Option<String>,
|
||||
reasoning_parser: Option<String>,
|
||||
tool_call_parser: Option<String>,
|
||||
backend: BackendType,
|
||||
history_backend: HistoryBackendType,
|
||||
oracle_config: Option<PyOracleConfig>,
|
||||
}
|
||||
|
||||
impl Router {
|
||||
@@ -132,6 +242,10 @@ impl Router {
|
||||
RoutingMode::Regular {
|
||||
worker_urls: vec![],
|
||||
}
|
||||
} else if matches!(self.backend, BackendType::Openai) {
|
||||
RoutingMode::OpenAI {
|
||||
worker_urls: self.worker_urls.clone(),
|
||||
}
|
||||
} else if self.pd_disaggregation {
|
||||
RoutingMode::PrefillDecode {
|
||||
prefill_urls: self.prefill_urls.clone().unwrap_or_default(),
|
||||
@@ -170,6 +284,20 @@ impl Router {
|
||||
_ => None,
|
||||
};
|
||||
|
||||
let history_backend = match self.history_backend {
|
||||
HistoryBackendType::Memory => config::HistoryBackend::Memory,
|
||||
HistoryBackendType::None => config::HistoryBackend::None,
|
||||
HistoryBackendType::Oracle => config::HistoryBackend::Oracle,
|
||||
};
|
||||
|
||||
let oracle = if matches!(self.history_backend, HistoryBackendType::Oracle) {
|
||||
self.oracle_config
|
||||
.as_ref()
|
||||
.map(|cfg| cfg.to_config_oracle())
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
Ok(config::RouterConfig {
|
||||
mode,
|
||||
policy,
|
||||
@@ -218,8 +346,8 @@ impl Router {
|
||||
model_path: self.model_path.clone(),
|
||||
tokenizer_path: self.tokenizer_path.clone(),
|
||||
chat_template: self.chat_template.clone(),
|
||||
history_backend: config::HistoryBackend::Memory,
|
||||
oracle: None,
|
||||
history_backend,
|
||||
oracle,
|
||||
reasoning_parser: self.reasoning_parser.clone(),
|
||||
tool_call_parser: self.tool_call_parser.clone(),
|
||||
})
|
||||
@@ -289,6 +417,9 @@ impl Router {
|
||||
chat_template = None,
|
||||
reasoning_parser = None,
|
||||
tool_call_parser = None,
|
||||
backend = BackendType::Sglang,
|
||||
history_backend = HistoryBackendType::Memory,
|
||||
oracle_config = None,
|
||||
))]
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
fn new(
|
||||
@@ -351,6 +482,9 @@ impl Router {
|
||||
chat_template: Option<String>,
|
||||
reasoning_parser: Option<String>,
|
||||
tool_call_parser: Option<String>,
|
||||
backend: BackendType,
|
||||
history_backend: HistoryBackendType,
|
||||
oracle_config: Option<PyOracleConfig>,
|
||||
) -> PyResult<Self> {
|
||||
let mut all_urls = worker_urls.clone();
|
||||
|
||||
@@ -427,6 +561,9 @@ impl Router {
|
||||
chat_template,
|
||||
reasoning_parser,
|
||||
tool_call_parser,
|
||||
backend,
|
||||
history_backend,
|
||||
oracle_config,
|
||||
})
|
||||
}
|
||||
|
||||
@@ -491,6 +628,9 @@ impl Router {
|
||||
#[pymodule]
|
||||
fn sglang_router_rs(m: &Bound<'_, PyModule>) -> PyResult<()> {
|
||||
m.add_class::<PolicyType>()?;
|
||||
m.add_class::<BackendType>()?;
|
||||
m.add_class::<HistoryBackendType>()?;
|
||||
m.add_class::<PyOracleConfig>()?;
|
||||
m.add_class::<Router>()?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -14,7 +14,7 @@ use chrono::Utc;
|
||||
use serde_json::{json, Value};
|
||||
use std::collections::HashMap;
|
||||
use std::sync::Arc;
|
||||
use tracing::{info, warn};
|
||||
use tracing::{debug, info, warn};
|
||||
|
||||
use super::responses::build_stored_response;
|
||||
|
||||
@@ -958,14 +958,14 @@ async fn create_and_link_item(
|
||||
.await
|
||||
.map_err(|e| format!("Failed to link item: {}", e))?;
|
||||
|
||||
info!(
|
||||
debug!(
|
||||
conversation_id = %conv_id.0,
|
||||
item_id = %created.id.0,
|
||||
item_type = %created.item_type,
|
||||
"Persisted conversation item and link"
|
||||
);
|
||||
} else {
|
||||
info!(
|
||||
debug!(
|
||||
item_id = %created.id.0,
|
||||
item_type = %created.item_type,
|
||||
"Persisted conversation item (no conversation link)"
|
||||
|
||||
@@ -27,7 +27,7 @@ use std::{
|
||||
};
|
||||
use tokio::sync::mpsc;
|
||||
use tokio_stream::wrappers::UnboundedReceiverStream;
|
||||
use tracing::{info, warn};
|
||||
use tracing::warn;
|
||||
|
||||
// Import from sibling modules
|
||||
use super::conversations::{
|
||||
@@ -197,6 +197,11 @@ impl OpenAIRouter {
|
||||
Ok(r) => r,
|
||||
Err(e) => {
|
||||
self.circuit_breaker.record_failure();
|
||||
tracing::error!(
|
||||
url = %url,
|
||||
error = %e,
|
||||
"Failed to forward request to OpenAI"
|
||||
);
|
||||
return (
|
||||
StatusCode::BAD_GATEWAY,
|
||||
format!("Failed to forward request to OpenAI: {}", e),
|
||||
@@ -518,12 +523,6 @@ impl crate::routers::RouterTrait for OpenAIRouter {
|
||||
) -> Response {
|
||||
let url = format!("{}/v1/responses", self.base_url);
|
||||
|
||||
info!(
|
||||
requested_store = body.store,
|
||||
is_streaming = body.stream,
|
||||
"openai_responses_request"
|
||||
);
|
||||
|
||||
// Validate mutually exclusive params: previous_response_id and conversation
|
||||
// TODO: this validation logic should move the right place, also we need a proper error message module
|
||||
if body.previous_response_id.is_some() && body.conversation.is_some() {
|
||||
|
||||
@@ -132,19 +132,30 @@ impl AppContext {
|
||||
SharedResponseStorage,
|
||||
SharedConversationStorage,
|
||||
) = match router_config.history_backend {
|
||||
HistoryBackend::Memory => (
|
||||
Arc::new(MemoryResponseStorage::new()),
|
||||
Arc::new(MemoryConversationStorage::new()),
|
||||
),
|
||||
HistoryBackend::None => (
|
||||
Arc::new(NoOpResponseStorage::new()),
|
||||
Arc::new(NoOpConversationStorage::new()),
|
||||
),
|
||||
HistoryBackend::Memory => {
|
||||
info!("Initializing data connector: Memory");
|
||||
(
|
||||
Arc::new(MemoryResponseStorage::new()),
|
||||
Arc::new(MemoryConversationStorage::new()),
|
||||
)
|
||||
}
|
||||
HistoryBackend::None => {
|
||||
info!("Initializing data connector: None (no persistence)");
|
||||
(
|
||||
Arc::new(NoOpResponseStorage::new()),
|
||||
Arc::new(NoOpConversationStorage::new()),
|
||||
)
|
||||
}
|
||||
HistoryBackend::Oracle => {
|
||||
let oracle_cfg = router_config.oracle.clone().ok_or_else(|| {
|
||||
"oracle configuration is required when history_backend=oracle".to_string()
|
||||
})?;
|
||||
|
||||
info!(
|
||||
"Initializing data connector: Oracle ATP (pool: {}-{})",
|
||||
oracle_cfg.pool_min, oracle_cfg.pool_max
|
||||
);
|
||||
|
||||
let response_storage =
|
||||
OracleResponseStorage::new(oracle_cfg.clone()).map_err(|err| {
|
||||
format!("failed to initialize Oracle response storage: {err}")
|
||||
@@ -155,6 +166,7 @@ impl AppContext {
|
||||
format!("failed to initialize Oracle conversation storage: {err}")
|
||||
})?;
|
||||
|
||||
info!("Data connector initialized successfully: Oracle ATP");
|
||||
(Arc::new(response_storage), Arc::new(conversation_storage))
|
||||
}
|
||||
};
|
||||
|
||||
Reference in New Issue
Block a user