Files
sglang/sgl-router/src/tool_parser/partial_json.rs

554 lines
17 KiB
Rust

use serde_json::{Map, Value};
use crate::tool_parser::{
errors::{ParserError, ParserResult},
traits::PartialJsonParser,
};
/// Parser for incomplete JSON
pub struct PartialJson {
/// Maximum depth for nested structures
max_depth: usize,
/// Whether to allow incomplete values
allow_incomplete: bool,
}
impl PartialJson {
/// Create a new partial JSON parser
pub fn new(max_depth: usize, allow_incomplete: bool) -> Self {
Self {
max_depth,
allow_incomplete,
}
}
/// Parse potentially incomplete JSON, returning parsed value and consumed bytes
///
/// # Arguments
/// * `input` - The JSON string to parse
/// * `allow_partial_strings` - When false, incomplete strings cause parsing to stop
/// (matches Python's Allow.ALL & ~Allow.STR behavior)
pub fn parse_value(
&self,
input: &str,
allow_partial_strings: bool,
) -> ParserResult<(Value, usize)> {
let mut parser = Parser::new(
input,
self.max_depth,
self.allow_incomplete,
allow_partial_strings,
);
let value = parser.parse_value(0)?;
Ok((value, parser.position))
}
}
impl Default for PartialJson {
fn default() -> Self {
Self::new(32, true)
}
}
impl PartialJsonParser for PartialJson {
fn parse(&self, input: &str) -> ParserResult<(Value, usize)> {
// Default to allowing partial strings
self.parse_value(input, true)
}
fn is_complete(&self, input: &str) -> bool {
// Try to parse as complete JSON
serde_json::from_str::<Value>(input).is_ok()
}
fn max_depth(&self) -> usize {
self.max_depth
}
}
/// Internal parser state
struct Parser<'a> {
chars: std::iter::Peekable<std::str::Chars<'a>>,
position: usize,
max_depth: usize,
allow_incomplete: bool,
allow_partial_strings: bool,
}
impl<'a> Parser<'a> {
fn new(
input: &'a str,
max_depth: usize,
allow_incomplete: bool,
allow_partial_strings: bool,
) -> Self {
Self {
chars: input.chars().peekable(),
position: 0,
max_depth,
allow_incomplete,
allow_partial_strings,
}
}
fn peek(&mut self) -> Option<char> {
self.chars.peek().copied()
}
fn advance(&mut self) {
if self.chars.next().is_some() {
self.position += 1;
}
}
fn skip_whitespace(&mut self) {
while let Some(ch) = self.peek() {
if ch.is_whitespace() {
self.advance();
} else {
break;
}
}
}
fn parse_value(&mut self, depth: usize) -> ParserResult<Value> {
if depth > self.max_depth {
return Err(ParserError::DepthExceeded(self.max_depth));
}
self.skip_whitespace();
match self.peek() {
Some('{') => self.parse_object(depth + 1),
Some('[') => self.parse_array(depth + 1),
Some('"') => self.parse_string(),
Some('t') | Some('f') => self.parse_bool(),
Some('n') => self.parse_null(),
Some(c) if c == '-' || c.is_ascii_digit() => self.parse_number(),
_ => {
if self.allow_incomplete {
Ok(Value::Null)
} else {
Err(ParserError::ParsingFailed("Unexpected character".into()))
}
}
}
}
fn parse_object(&mut self, depth: usize) -> ParserResult<Value> {
if depth > self.max_depth {
return Err(ParserError::DepthExceeded(self.max_depth));
}
let mut object = Map::new();
// Consume '{'
self.advance();
self.skip_whitespace();
// Check for empty object
if self.peek() == Some('}') {
self.advance();
return Ok(Value::Object(object));
}
loop {
// Parse key
let key = match self.parse_string() {
Ok(Value::String(s)) => s,
Err(_) if self.allow_incomplete => {
// Incomplete object
return Ok(Value::Object(object));
}
Err(e) => return Err(e),
_ => return Err(ParserError::ParsingFailed("Expected string key".into())),
};
self.skip_whitespace();
// Expect ':'
if self.peek() != Some(':') {
if self.allow_incomplete {
// Add null value for incomplete pair
object.insert(key, Value::Null);
return Ok(Value::Object(object));
}
return Err(ParserError::ParsingFailed("Expected ':'".into()));
}
self.advance();
self.skip_whitespace();
// Parse value (keep same depth - we already incremented in parse_object)
let value = match self.parse_value(depth) {
Ok(v) => v,
Err(_) if self.allow_incomplete => {
// When allow_partial_strings is false, don't add the key with Null
// Just return the object without this incomplete key-value pair
// This matches Python's behavior: Allow.ALL & ~Allow.STR
if self.allow_partial_strings {
// Add null for incomplete value
object.insert(key, Value::Null);
}
return Ok(Value::Object(object));
}
Err(e) => return Err(e),
};
object.insert(key, value);
self.skip_whitespace();
match self.peek() {
Some(',') => {
self.advance();
self.skip_whitespace();
// Check for trailing comma
if self.peek() == Some('}') {
self.advance();
return Ok(Value::Object(object));
}
}
Some('}') => {
self.advance();
return Ok(Value::Object(object));
}
None if self.allow_incomplete => {
return Ok(Value::Object(object));
}
_ => {
if self.allow_incomplete {
return Ok(Value::Object(object));
}
return Err(ParserError::ParsingFailed("Expected ',' or '}'".into()));
}
}
}
}
fn parse_array(&mut self, depth: usize) -> ParserResult<Value> {
if depth > self.max_depth {
return Err(ParserError::DepthExceeded(self.max_depth));
}
let mut array = Vec::new();
// Consume '['
self.advance();
self.skip_whitespace();
// Check for empty array
if self.peek() == Some(']') {
self.advance();
return Ok(Value::Array(array));
}
loop {
// Parse value (keep same depth - we already incremented in parse_object)
let value = match self.parse_value(depth) {
Ok(v) => v,
Err(_) if self.allow_incomplete => {
return Ok(Value::Array(array));
}
Err(e) => return Err(e),
};
array.push(value);
self.skip_whitespace();
match self.peek() {
Some(',') => {
self.advance();
self.skip_whitespace();
// Check for trailing comma
if self.peek() == Some(']') {
self.advance();
return Ok(Value::Array(array));
}
}
Some(']') => {
self.advance();
return Ok(Value::Array(array));
}
None if self.allow_incomplete => {
return Ok(Value::Array(array));
}
_ => {
if self.allow_incomplete {
return Ok(Value::Array(array));
}
return Err(ParserError::ParsingFailed("Expected ',' or ']'".into()));
}
}
}
}
fn parse_string(&mut self) -> ParserResult<Value> {
if self.peek() != Some('"') {
return Err(ParserError::ParsingFailed("Expected '\"'".into()));
}
// Consume opening quote
self.advance();
let mut string = String::new();
let mut escaped = false;
while let Some(ch) = self.peek() {
if escaped {
// Handle escape sequences
let escaped_char = match ch {
'"' | '\\' | '/' => ch,
'b' => '\u{0008}',
'f' => '\u{000C}',
'n' => '\n',
'r' => '\r',
't' => '\t',
'u' => {
// Unicode escape
self.advance();
let hex = self.parse_unicode_escape()?;
string.push(hex);
escaped = false;
continue;
}
_ => ch, // Invalid escape, but be lenient
};
string.push(escaped_char);
escaped = false;
} else if ch == '\\' {
escaped = true;
} else if ch == '"' {
// End of string
self.advance();
return Ok(Value::String(string));
} else {
string.push(ch);
}
self.advance();
}
// Incomplete string
if self.allow_incomplete && self.allow_partial_strings {
Ok(Value::String(string))
} else {
Err(ParserError::ParsingFailed("Unterminated string".into()))
}
}
fn parse_unicode_escape(&mut self) -> ParserResult<char> {
let mut hex = String::new();
for _ in 0..4 {
if let Some(ch) = self.peek() {
if ch.is_ascii_hexdigit() {
hex.push(ch);
self.advance();
} else {
break;
}
} else {
break;
}
}
if hex.len() == 4 {
u32::from_str_radix(&hex, 16)
.ok()
.and_then(char::from_u32)
.ok_or_else(|| ParserError::ParsingFailed("Invalid unicode escape".into()))
} else if self.allow_incomplete {
Ok('\u{FFFD}') // Replacement character
} else {
Err(ParserError::ParsingFailed(
"Incomplete unicode escape".into(),
))
}
}
fn parse_number(&mut self) -> ParserResult<Value> {
let mut number = String::new();
// Handle negative sign
if self.peek() == Some('-') {
number.push('-');
self.advance();
}
// Parse integer part
if self.peek() == Some('0') {
number.push('0');
self.advance();
} else {
while let Some(ch) = self.peek() {
if ch.is_ascii_digit() {
number.push(ch);
self.advance();
} else {
break;
}
}
}
// Parse decimal part
if self.peek() == Some('.') {
number.push('.');
self.advance();
while let Some(ch) = self.peek() {
if ch.is_ascii_digit() {
number.push(ch);
self.advance();
} else {
break;
}
}
}
// Parse exponent
if let Some(ch) = self.peek() {
if ch == 'e' || ch == 'E' {
number.push(ch);
self.advance();
if let Some(sign) = self.peek() {
if sign == '+' || sign == '-' {
number.push(sign);
self.advance();
}
}
while let Some(ch) = self.peek() {
if ch.is_ascii_digit() {
number.push(ch);
self.advance();
} else {
break;
}
}
}
}
// Try to parse as integer first, then as float
if let Ok(n) = number.parse::<i64>() {
Ok(Value::Number(serde_json::Number::from(n)))
} else if let Ok(n) = number.parse::<f64>() {
Ok(Value::Number(
serde_json::Number::from_f64(n).unwrap_or_else(|| serde_json::Number::from(0)),
))
} else if self.allow_incomplete {
Ok(Value::Number(serde_json::Number::from(0)))
} else {
Err(ParserError::ParsingFailed("Invalid number".into()))
}
}
fn parse_bool(&mut self) -> ParserResult<Value> {
let mut word = String::new();
// Peek at upcoming characters to validate it looks like a boolean
let mut temp_chars = self.chars.clone();
while let Some(&ch) = temp_chars.peek() {
if ch.is_alphabetic() && word.len() < 5 {
// "false" is 5 chars
word.push(ch);
temp_chars.next();
} else {
break;
}
}
// Check if it's a valid boolean prefix
let is_valid = word == "true"
|| word == "false"
|| (self.allow_incomplete && ("true".starts_with(&word) || "false".starts_with(&word)));
if !is_valid {
return Err(ParserError::ParsingFailed("Invalid boolean".into()));
}
// Now actually consume the characters
word.clear();
while let Some(ch) = self.peek() {
if ch.is_alphabetic() {
word.push(ch);
self.advance();
} else {
break;
}
}
match word.as_str() {
"true" => Ok(Value::Bool(true)),
"false" => Ok(Value::Bool(false)),
partial if self.allow_incomplete => {
if "true".starts_with(partial) {
Ok(Value::Bool(true))
} else if "false".starts_with(partial) {
Ok(Value::Bool(false))
} else {
Err(ParserError::ParsingFailed("Invalid boolean".into()))
}
}
_ => Err(ParserError::ParsingFailed("Invalid boolean".into())),
}
}
fn parse_null(&mut self) -> ParserResult<Value> {
let mut word = String::new();
// Peek at upcoming characters to validate it looks like "null"
let mut temp_chars = self.chars.clone();
while let Some(&ch) = temp_chars.peek() {
if ch.is_alphabetic() && word.len() < 4 {
// "null" is 4 chars
word.push(ch);
temp_chars.next();
} else {
break;
}
}
// Check if it's a valid null prefix
let is_valid = word == "null" || (self.allow_incomplete && "null".starts_with(&word));
if !is_valid {
return Err(ParserError::ParsingFailed("Invalid null".into()));
}
// Now actually consume the characters
word.clear();
while let Some(ch) = self.peek() {
if ch.is_alphabetic() {
word.push(ch);
self.advance();
} else {
break;
}
}
if word == "null" || (self.allow_incomplete && "null".starts_with(&word)) {
Ok(Value::Null)
} else {
Err(ParserError::ParsingFailed("Invalid null".into()))
}
}
}
/// Utility function to check if a string contains complete JSON
pub fn is_complete_json(input: &str) -> bool {
serde_json::from_str::<Value>(input).is_ok()
}
/// Utility function to find common prefix between two strings
pub fn find_common_prefix(s1: &str, s2: &str) -> usize {
s1.chars()
.zip(s2.chars())
.take_while(|(a, b)| a == b)
.count()
}
/// Utility function to compute diff between old and new strings
pub fn compute_diff(old: &str, new: &str) -> String {
let common_len = find_common_prefix(old, new);
// Convert character count to byte offset
new.chars().skip(common_len).collect()
}