[router] remove old/oudated/useless comments across code base (#10968)

2025-09-26 13:48:50 -04:00
parent a7fe6e10a1
commit aae7ead2d0
56 changed files with 19 additions and 645 deletions
--- a/sgl-router/tests/api_endpoints_test.rs
+++ b/sgl-router/tests/api_endpoints_test.rs
@@ -576,7 +576,6 @@ mod model_info_tests {
        let ctx = TestContext::new(vec![]).await;
        let app = ctx.create_app().await;

-        // Test server info with no workers
        let req = Request::builder()
            .method("GET")
            .uri("/get_server_info")
@@ -593,7 +592,6 @@ mod model_info_tests {
            resp.status()
        );

-        // Test model info with no workers
        let req = Request::builder()
            .method("GET")
            .uri("/get_model_info")
@@ -610,7 +608,6 @@ mod model_info_tests {
            resp.status()
        );

-        // Test v1/models with no workers
        let req = Request::builder()
            .method("GET")
            .uri("/v1/models")
@@ -652,7 +649,6 @@ mod model_info_tests {

        let app = ctx.create_app().await;

-        // Test that model info is consistent across workers
        for _ in 0..5 {
            let req = Request::builder()
                .method("GET")
@@ -795,7 +791,6 @@ mod worker_management_tests {
        let resp = app.clone().oneshot(req).await.unwrap();
        assert_eq!(resp.status(), StatusCode::OK);

-        // Verify it's removed
        let req = Request::builder()
            .method("GET")
            .uri("/list_workers")
@@ -1302,7 +1297,6 @@ mod error_tests {

        let app = ctx.create_app().await;

-        // Test unknown endpoint
        let req = Request::builder()
            .method("GET")
            .uri("/unknown_endpoint")
@@ -1312,7 +1306,6 @@ mod error_tests {
        let resp = app.clone().oneshot(req).await.unwrap();
        assert_eq!(resp.status(), StatusCode::NOT_FOUND);

-        // Test POST to unknown endpoint
        let req = Request::builder()
            .method("POST")
            .uri("/api/v2/generate")
@@ -1606,7 +1599,6 @@ mod cache_tests {
            .unwrap();
        let body_json: serde_json::Value = serde_json::from_slice(&body).unwrap();

-        // Verify the response contains load information
        assert!(body_json.is_object());
        // The exact structure depends on the implementation
        // but should contain worker load information
@@ -1797,7 +1789,6 @@ mod request_id_tests {

        let app = ctx.create_app().await;

-        // Test 1: Request without any request ID header should generate one
        let payload = json!({
            "text": "Test request",
            "stream": false
@@ -1830,7 +1821,6 @@ mod request_id_tests {
            "Request ID should have content after prefix"
        );

-        // Test 2: Request with custom x-request-id should preserve it
        let custom_id = "custom-request-id-123";
        let req = Request::builder()
            .method("POST")
@@ -1847,7 +1837,6 @@ mod request_id_tests {
        assert!(response_id.is_some());
        assert_eq!(response_id.unwrap(), custom_id);

-        // Test 3: Different endpoints should have different prefixes
        let chat_payload = json!({
            "messages": [{"role": "user", "content": "Hello"}],
            "model": "test-model"
@@ -1871,7 +1860,6 @@ mod request_id_tests {
            .unwrap()
            .starts_with("chatcmpl-"));

-        // Test 4: Alternative request ID headers should be recognized
        let req = Request::builder()
            .method("POST")
            .uri("/generate")
@@ -1948,7 +1936,6 @@ mod request_id_tests {
            "stream": false
        });

-        // Test custom header is recognized
        let req = Request::builder()
            .method("POST")
            .uri("/generate")
@@ -2013,7 +2000,6 @@ mod rerank_tests {
            .unwrap();
        let body_json: serde_json::Value = serde_json::from_slice(&body).unwrap();

-        // Verify response structure
        assert!(body_json.get("results").is_some());
        assert!(body_json.get("model").is_some());
        assert_eq!(body_json["model"], "test-rerank-model");
@@ -2021,7 +2007,6 @@ mod rerank_tests {
        let results = body_json["results"].as_array().unwrap();
        assert_eq!(results.len(), 2);

-        // Verify results are sorted by score (highest first)
        assert!(results[0]["score"].as_f64().unwrap() >= results[1]["score"].as_f64().unwrap());

        ctx.shutdown().await;
@@ -2164,7 +2149,6 @@ mod rerank_tests {

        let app = ctx.create_app().await;

-        // Test V1 API format (simplified input)
        let payload = json!({
            "query": "machine learning algorithms",
            "documents": [
@@ -2189,7 +2173,6 @@ mod rerank_tests {
            .unwrap();
        let body_json: serde_json::Value = serde_json::from_slice(&body).unwrap();

-        // Verify response structure
        assert!(body_json.get("results").is_some());
        assert!(body_json.get("model").is_some());

@@ -2199,7 +2182,6 @@ mod rerank_tests {
        let results = body_json["results"].as_array().unwrap();
        assert_eq!(results.len(), 3); // All documents should be returned

-        // Verify results are sorted by score (highest first)
        assert!(results[0]["score"].as_f64().unwrap() >= results[1]["score"].as_f64().unwrap());
        assert!(results[1]["score"].as_f64().unwrap() >= results[2]["score"].as_f64().unwrap());

@@ -2224,7 +2206,6 @@ mod rerank_tests {

        let app = ctx.create_app().await;

-        // Test empty query string (validation should fail)
        let payload = json!({
            "query": "",
            "documents": ["Document 1", "Document 2"],
@@ -2241,7 +2222,6 @@ mod rerank_tests {
        let resp = app.clone().oneshot(req).await.unwrap();
        assert_eq!(resp.status(), StatusCode::BAD_REQUEST);

-        // Test query with only whitespace (validation should fail)
        let payload = json!({
            "query": "   ",
            "documents": ["Document 1", "Document 2"],
@@ -2258,7 +2238,6 @@ mod rerank_tests {
        let resp = app.clone().oneshot(req).await.unwrap();
        assert_eq!(resp.status(), StatusCode::BAD_REQUEST);

-        // Test empty documents list (validation should fail)
        let payload = json!({
            "query": "test query",
            "documents": [],
@@ -2275,7 +2254,6 @@ mod rerank_tests {
        let resp = app.clone().oneshot(req).await.unwrap();
        assert_eq!(resp.status(), StatusCode::BAD_REQUEST);

-        // Test invalid top_k (validation should fail)
        let payload = json!({
            "query": "test query",
            "documents": ["Document 1", "Document 2"],
--- a/sgl-router/tests/cache_aware_backward_compat_test.rs
+++ b/sgl-router/tests/cache_aware_backward_compat_test.rs
@@ -93,19 +93,16 @@ fn test_mixed_model_ids() {
    policy.add_worker(&worker3);
    policy.add_worker(&worker4);

-    // Test selection with default workers only
    let default_workers: Vec<Arc<dyn Worker>> =
        vec![Arc::new(worker1.clone()), Arc::new(worker3.clone())];
    let selected = policy.select_worker(&default_workers, Some("test request"));
    assert!(selected.is_some(), "Should select from default workers");

-    // Test selection with specific model workers only
    let llama_workers: Vec<Arc<dyn Worker>> =
        vec![Arc::new(worker2.clone()), Arc::new(worker4.clone())];
    let selected = policy.select_worker(&llama_workers, Some("test request"));
    assert!(selected.is_some(), "Should select from llama-3 workers");

-    // Test selection with mixed workers
    let all_workers: Vec<Arc<dyn Worker>> = vec![
        Arc::new(worker1.clone()),
        Arc::new(worker2.clone()),
@@ -144,7 +141,6 @@ fn test_remove_worker_by_url_backward_compat() {
    // Should remove from all trees since we don't know the model
    policy.remove_worker_by_url("http://worker1:8080");

-    // Verify removal worked
    let workers: Vec<Arc<dyn Worker>> = vec![Arc::new(worker2.clone())];
    let selected = policy.select_worker(&workers, Some("test"));
    assert_eq!(selected, Some(0), "Should only have worker2 left");
--- a/sgl-router/tests/chat_template_integration.rs
+++ b/sgl-router/tests/chat_template_integration.rs
@@ -89,7 +89,6 @@ fn test_chat_template_with_tokens() {

 #[test]
 fn test_llama_style_template() {
-    // Test a Llama-style chat template
    let template = r#"
 {%- if messages[0]['role'] == 'system' -%}
    {%- set system_message = messages[0]['content'] -%}
@@ -160,7 +159,6 @@ fn test_llama_style_template() {

 #[test]
 fn test_chatml_template() {
-    // Test a ChatML-style template
    let template = r#"
 {%- for message in messages %}
    {{- '<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>\n' }}
@@ -241,13 +239,11 @@ assistant:
        .map(|msg| serde_json::to_value(msg).unwrap())
        .collect();

-    // Test without generation prompt
    let result = processor
        .apply_chat_template(&json_messages, ChatTemplateParams::default())
        .unwrap();
    assert_eq!(result.trim(), "user: Test");

-    // Test with generation prompt
    let result_with_prompt = processor
        .apply_chat_template(
            &json_messages,
@@ -275,7 +271,6 @@ fn test_empty_messages_template() {

 #[test]
 fn test_content_format_detection() {
-    // Test string format detection
    let string_template = r#"
 {%- for message in messages -%}
 {{ message.role }}: {{ message.content }}
@@ -286,7 +281,6 @@ fn test_content_format_detection() {
        ChatTemplateContentFormat::String
    );

-    // Test OpenAI format detection
    let openai_template = r#"
 {%- for message in messages -%}
  {%- for content in message.content -%}
@@ -302,7 +296,6 @@ fn test_content_format_detection() {

 #[test]
 fn test_template_with_multimodal_content() {
-    // Test that multimodal messages work correctly when serialized to JSON
    let template = r#"
 {%- for message in messages %}
 {{ message.role }}:
--- a/sgl-router/tests/chat_template_loading.rs
+++ b/sgl-router/tests/chat_template_loading.rs
@@ -57,7 +57,6 @@ mod tests {
        )
        .unwrap();

-        // Test that the custom template is used
        let messages = vec![
            spec::ChatMessage::User {
                role: "user".to_string(),
@@ -89,7 +88,6 @@ mod tests {
            .apply_chat_template(&json_messages, params)
            .unwrap();

-        // Verify the custom template format
        assert!(result.contains("<|user|>Hello"));
        assert!(result.contains("<|assistant|>Hi there"));
        assert!(result.ends_with("<|assistant|>"));
--- a/sgl-router/tests/common/mock_mcp_server.rs
+++ b/sgl-router/tests/common/mock_mcp_server.rs
@@ -148,7 +148,6 @@ mod tests {
    async fn test_mock_server_with_rmcp_client() {
        let mut server = MockMCPServer::start().await.unwrap();

-        // Test that we can connect with rmcp client
        use rmcp::transport::StreamableHttpClientTransport;
        use rmcp::ServiceExt;

@@ -158,7 +157,6 @@ mod tests {
        assert!(client.is_ok(), "Should be able to connect to mock server");

        if let Ok(client) = client {
-            // Test listing tools
            let tools = client.peer().list_all_tools().await;
            assert!(tools.is_ok(), "Should be able to list tools");

--- a/sgl-router/tests/common/mod.rs
+++ b/sgl-router/tests/common/mod.rs
@@ -71,7 +71,6 @@ pub fn ensure_tokenizer_cached() -> PathBuf {

        let content = response.bytes().expect("Failed to read tokenizer content");

-        // Verify we got actual JSON content
        if content.len() < 100 {
            panic!("Downloaded content too small: {} bytes", content.len());
        }
--- a/sgl-router/tests/mcp_test.rs
+++ b/sgl-router/tests/mcp_test.rs
@@ -1,7 +1,6 @@
 // This test suite validates the complete MCP implementation against the
 // functionality required for SGLang responses API integration.
 //
-// Test Coverage:
 // - Core MCP server functionality
 // - Tool session management (individual and multi-tool)
 // - Tool execution and error handling
@@ -26,7 +25,6 @@ async fn create_mock_server() -> MockMCPServer {

 #[tokio::test]
 async fn test_mcp_server_initialization() {
-    // Test that we can create an empty configuration
    let config = McpConfig { servers: vec![] };

    // Should fail with no servers
@@ -329,7 +327,6 @@ async fn test_tool_info_structure() {

 #[tokio::test]
 async fn test_sse_connection() {
-    // Test with a non-existent command using STDIO to avoid retry delays
    // This tests that SSE configuration is properly handled even when connection fails
    let config = McpConfig {
        servers: vec![McpServerConfig {
@@ -351,8 +348,6 @@ async fn test_sse_connection() {

 #[tokio::test]
 async fn test_transport_types() {
-    // Test different transport configurations
-
    // HTTP/Streamable transport
    let http_config = McpServerConfig {
        name: "http_server".to_string(),
@@ -444,7 +439,6 @@ async fn test_complete_workflow() {
    // 7. Clean shutdown
    manager.shutdown().await;

-    // Verify all required capabilities for responses API integration
    let capabilities = [
        "MCP server initialization",
        "Tool server connection and discovery",
--- a/sgl-router/tests/policy_registry_integration.rs
+++ b/sgl-router/tests/policy_registry_integration.rs
@@ -20,8 +20,6 @@ async fn test_policy_registry_with_router_manager() {
    // Create RouterManager with shared registries
    let _router_manager = RouterManager::new(worker_registry.clone());

-    // Test adding workers with different models and policies
-
    // Add first worker for llama-3 with cache_aware policy hint
    let mut labels1 = HashMap::new();
    labels1.insert("policy".to_string(), "cache_aware".to_string());
@@ -44,7 +42,6 @@ async fn test_policy_registry_with_router_manager() {
    // This would normally connect to a real worker, but for testing we'll just verify the structure
    // In a real test, we'd need to mock the worker or use a test server

-    // Verify PolicyRegistry has the correct policy for llama-3
    let _llama_policy = policy_registry.get_policy("llama-3");
    // After first worker is added, llama-3 should have a policy

@@ -88,10 +85,8 @@ async fn test_policy_registry_with_router_manager() {
        chat_template: None,
    };

-    // Verify gpt-4 has random policy
    let _gpt_policy = policy_registry.get_policy("gpt-4");

-    // Test removing workers
    // When we remove both llama-3 workers, the policy should be cleaned up

    println!("PolicyRegistry integration test structure created");
@@ -113,7 +108,6 @@ fn test_policy_registry_cleanup() {
    let policy2 = registry.on_worker_added("model-1", Some("random"));
    assert_eq!(policy2.name(), "cache_aware"); // Should still be cache_aware

-    // Verify policy exists
    assert!(registry.get_policy("model-1").is_some());

    // Remove first worker - policy should remain
@@ -143,7 +137,6 @@ fn test_policy_registry_multiple_models() {
    assert_eq!(gpt_policy.name(), "random");
    assert_eq!(mistral_policy.name(), "round_robin"); // Default

-    // Verify all policies are stored
    assert!(registry.get_policy("llama-3").is_some());
    assert!(registry.get_policy("gpt-4").is_some());
    assert!(registry.get_policy("mistral").is_some());
--- a/sgl-router/tests/request_formats_test.rs
+++ b/sgl-router/tests/request_formats_test.rs
@@ -126,7 +126,6 @@ mod request_format_tests {
        }])
        .await;

-        // Test 1: Basic text request
        let payload = json!({
            "text": "Hello, world!",
            "stream": false
@@ -135,7 +134,6 @@ mod request_format_tests {
        let result = ctx.make_request("/generate", payload).await;
        assert!(result.is_ok());

-        // Test 2: Request with sampling parameters
        let payload = json!({
            "text": "Tell me a story",
            "sampling_params": {
@@ -149,7 +147,6 @@ mod request_format_tests {
        let result = ctx.make_request("/generate", payload).await;
        assert!(result.is_ok());

-        // Test 3: Request with input_ids
        let payload = json!({
            "input_ids": [1, 2, 3, 4, 5],
            "sampling_params": {
@@ -176,7 +173,6 @@ mod request_format_tests {
        }])
        .await;

-        // Test 1: Basic chat completion
        let payload = json!({
            "model": "test-model",
            "messages": [
@@ -197,7 +193,6 @@ mod request_format_tests {
            Some("chat.completion")
        );

-        // Test 2: Chat completion with parameters
        let payload = json!({
            "model": "test-model",
            "messages": [
@@ -226,7 +221,6 @@ mod request_format_tests {
        }])
        .await;

-        // Test 1: Basic completion
        let payload = json!({
            "model": "test-model",
            "prompt": "Once upon a time",
@@ -244,7 +238,6 @@ mod request_format_tests {
            Some("text_completion")
        );

-        // Test 2: Completion with array prompt
        let payload = json!({
            "model": "test-model",
            "prompt": ["First prompt", "Second prompt"],
@@ -255,7 +248,6 @@ mod request_format_tests {
        let result = ctx.make_request("/v1/completions", payload).await;
        assert!(result.is_ok());

-        // Test 3: Completion with logprobs
        let payload = json!({
            "model": "test-model",
            "prompt": "The capital of France is",
@@ -281,7 +273,6 @@ mod request_format_tests {
        }])
        .await;

-        // Test batch text generation
        let payload = json!({
            "text": ["First text", "Second text", "Third text"],
            "sampling_params": {
@@ -294,7 +285,6 @@ mod request_format_tests {
        let result = ctx.make_request("/generate", payload).await;
        assert!(result.is_ok());

-        // Test batch with input_ids
        let payload = json!({
            "input_ids": [[1, 2, 3], [4, 5, 6], [7, 8, 9]],
            "stream": false
@@ -317,7 +307,6 @@ mod request_format_tests {
        }])
        .await;

-        // Test with return_logprob
        let payload = json!({
            "text": "Test",
            "return_logprob": true,
@@ -327,7 +316,6 @@ mod request_format_tests {
        let result = ctx.make_request("/generate", payload).await;
        assert!(result.is_ok());

-        // Test with json_schema
        let payload = json!({
            "text": "Generate JSON",
            "sampling_params": {
@@ -340,7 +328,6 @@ mod request_format_tests {
        let result = ctx.make_request("/generate", payload).await;
        assert!(result.is_ok());

-        // Test with ignore_eos
        let payload = json!({
            "text": "Continue forever",
            "sampling_params": {
@@ -368,7 +355,6 @@ mod request_format_tests {
        }])
        .await;

-        // Test with empty body - should still work with mock worker
        let payload = json!({});

        let result = ctx.make_request("/generate", payload).await;
--- a/sgl-router/tests/responses_api_test.rs
+++ b/sgl-router/tests/responses_api_test.rs
@@ -44,7 +44,6 @@ fn test_responses_request_creation() {
        repetition_penalty: 1.0,
    };

-    // Test GenerationRequest trait implementation
    assert!(!request.is_stream());
    assert_eq!(request.get_model(), Some("test-model"));
    let routing_text = request.extract_text_for_routing();
@@ -139,7 +138,6 @@ fn test_usage_conversion() {
        8
    );

-    // Test reverse conversion
    let back_to_usage = response_usage.to_usage_info();
    assert_eq!(back_to_usage.prompt_tokens, 15);
    assert_eq!(back_to_usage.completion_tokens, 25);
@@ -152,7 +150,6 @@ fn test_reasoning_param_default() {
        effort: Some(ReasoningEffort::Medium),
    };

-    // Test JSON serialization/deserialization preserves default
    let json = serde_json::to_string(&param).unwrap();
    let parsed: ResponseReasoningParam = serde_json::from_str(&json).unwrap();

@@ -197,7 +194,6 @@ fn test_json_serialization() {
        repetition_penalty: 1.2,
    };

-    // Test that everything can be serialized to JSON and back
    let json = serde_json::to_string(&request).expect("Serialization should work");
    let parsed: ResponsesRequest =
        serde_json::from_str(&json).expect("Deserialization should work");
--- a/sgl-router/tests/streaming_tests.rs
+++ b/sgl-router/tests/streaming_tests.rs
@@ -197,7 +197,6 @@ mod streaming_tests {
        let events = result.unwrap();
        assert!(events.len() >= 2); // At least one chunk + [DONE]

-        // Verify events are valid JSON (except [DONE])
        for event in &events {
            if event != "[DONE]" {
                let parsed: Result<serde_json::Value, _> = serde_json::from_str(event);
@@ -329,7 +328,6 @@ mod streaming_tests {

    #[tokio::test]
    async fn test_sse_format_parsing() {
-        // Test SSE format parsing
        let parse_sse_chunk = |chunk: &[u8]| -> Vec<String> {
            let text = String::from_utf8_lossy(chunk);
            text.lines()
@@ -347,7 +345,6 @@ mod streaming_tests {
        assert_eq!(events[1], "{\"text\":\" world\"}");
        assert_eq!(events[2], "[DONE]");

-        // Test with mixed content
        let mixed = b"event: message\ndata: {\"test\":true}\n\n: comment\ndata: [DONE]\n\n";
        let events = parse_sse_chunk(mixed);

--- a/sgl-router/tests/test_openai_routing.rs
+++ b/sgl-router/tests/test_openai_routing.rs
@@ -84,8 +84,6 @@ fn create_minimal_completion_request() -> CompletionRequest {
    }
 }

-// ============= Basic Unit Tests =============
-
 /// Test basic OpenAI router creation and configuration
 #[tokio::test]
 async fn test_openai_router_creation() {
@@ -575,7 +573,6 @@ async fn test_unsupported_endpoints() {
    .await
    .unwrap();

-    // Test generate endpoint (SGLang-specific, should not be supported)
    let generate_request = GenerateRequest {
        prompt: None,
        text: Some("Hello world".to_string()),
@@ -593,7 +590,6 @@ async fn test_unsupported_endpoints() {
    let response = router.route_generate(None, &generate_request, None).await;
    assert_eq!(response.status(), StatusCode::NOT_IMPLEMENTED);

-    // Test completion endpoint (should also not be supported)
    let completion_request = create_minimal_completion_request();
    let response = router
        .route_completion(None, &completion_request, None)
@@ -601,8 +597,6 @@ async fn test_unsupported_endpoints() {
    assert_eq!(response.status(), StatusCode::NOT_IMPLEMENTED);
 }

-// ============= Mock Server E2E Tests =============
-
 /// Test chat completion with mock OpenAI server
 #[tokio::test]
 async fn test_openai_router_chat_completion_with_mock() {
@@ -635,7 +629,6 @@ async fn test_openai_router_chat_completion_with_mock() {
    let body_str = String::from_utf8(body_bytes.to_vec()).unwrap();
    let chat_response: serde_json::Value = serde_json::from_str(&body_str).unwrap();

-    // Verify it's a valid chat completion response
    assert_eq!(chat_response["object"], "chat.completion");
    assert_eq!(chat_response["model"], "gpt-3.5-turbo");
    assert!(!chat_response["choices"].as_array().unwrap().is_empty());
@@ -704,7 +697,6 @@ async fn test_openai_e2e_with_server() {
        .unwrap();
    let response_json: serde_json::Value = serde_json::from_slice(&body).unwrap();

-    // Verify the response structure
    assert_eq!(response_json["object"], "chat.completion");
    assert_eq!(response_json["model"], "gpt-3.5-turbo");
    assert!(!response_json["choices"].as_array().unwrap().is_empty());
--- a/sgl-router/tests/test_pd_routing.rs
+++ b/sgl-router/tests/test_pd_routing.rs
@@ -9,7 +9,6 @@ mod test_pd_routing {
    use sglang_router_rs::routers::http::pd_types::PDSelectionPolicy;
    use sglang_router_rs::routers::RouterFactory;

-    // Test-only struct to help validate PD request parsing
    #[derive(Debug)]
    struct PDRequest {
        pub is_stream: bool,
@@ -17,14 +16,12 @@ mod test_pd_routing {
    }

    impl PDRequest {
-        // Extract PD-relevant info from JSON for testing
        pub fn from_json(json: &serde_json::Value) -> Self {
            let is_stream = json
                .get("stream")
                .and_then(|v| v.as_bool())
                .unwrap_or(false);

-            // Detect batch size from text or input_ids
            let batch_size = if let Some(text) = json.get("text") {
                text.as_array().map(|arr| arr.len())
            } else if let Some(input_ids) = json.get("input_ids") {
@@ -40,15 +37,10 @@ mod test_pd_routing {
        }
    }

-    // ========================================================================
-    // Phase 1: Basic PD Components and Router Creation
-    // ========================================================================
-
    #[test]
    fn test_worker_types() {
        use sglang_router_rs::core::{BasicWorkerBuilder, Worker, WorkerType};

-        // Test worker creation for prefill servers
        let prefill_worker: Box<dyn Worker> = Box::new(
            BasicWorkerBuilder::new("http://prefill:8080")
                .worker_type(WorkerType::Prefill {
@@ -65,7 +57,6 @@ mod test_pd_routing {
            _ => panic!("Expected Prefill worker type"),
        }

-        // Test worker creation for decode servers
        let decode_worker: Box<dyn Worker> = Box::new(
            BasicWorkerBuilder::new("http://decode:8080")
                .worker_type(WorkerType::Decode)
@@ -78,7 +69,6 @@ mod test_pd_routing {
            _ => panic!("Expected Decode worker type"),
        }

-        // Test regular worker creation
        let regular_worker: Box<dyn Worker> = Box::new(
            BasicWorkerBuilder::new("http://regular:8080")
                .worker_type(WorkerType::Regular)
@@ -94,7 +84,6 @@ mod test_pd_routing {

    #[test]
    fn test_pd_selection_policies() {
-        // Test all PD selection policy variants
        // Note: These policies are only used when pd_disaggregation=true
        let policies = vec![
            PDSelectionPolicy::Random,
@@ -107,7 +96,6 @@ mod test_pd_routing {
        ];

        for policy in policies {
-            // Verify each policy can be created and matched
            match &policy {
                PDSelectionPolicy::Random => {
                    assert!(matches!(policy, PDSelectionPolicy::Random));
@@ -126,7 +114,6 @@ mod test_pd_routing {

    #[tokio::test]
    async fn test_pd_router_configuration() {
-        // Test PD router configuration with various policies
        // In the new structure, RoutingMode and PolicyConfig are separate
        let test_cases = vec![
            (
@@ -221,7 +208,6 @@ mod test_pd_routing {
                "Router creation should succeed with empty worker"
            );

-            // Verify that no workers are registered since we didn't initialize them
            let stats = app_context.worker_registry.stats();
            assert_eq!(
                stats.total_workers, 0,
@@ -230,13 +216,8 @@ mod test_pd_routing {
        }
    }

-    // ========================================================================
-    // Phase 2: Bootstrap Injection and Request Handling
-    // ========================================================================
-
    #[test]
    fn test_pd_request_from_json() {
-        // Test PDRequest parsing from single text request
        let single_json = json!({
            "text": "Hello world",
            "stream": false,
@@ -248,7 +229,6 @@ mod test_pd_routing {
        assert!(!pd_req.is_stream);
        assert_eq!(pd_req.batch_size, None);

-        // Test PDRequest parsing from batch text request
        let batch_json = json!({
            "text": ["Hello", "World", "Test"],
            "stream": true,
@@ -259,7 +239,6 @@ mod test_pd_routing {
        assert!(pd_req.is_stream);
        assert_eq!(pd_req.batch_size, Some(3));

-        // Test PDRequest parsing from input_ids request
        let ids_json = json!({
            "input_ids": [[1, 2, 3], [4, 5, 6]],
            "stream": false
@@ -269,7 +248,6 @@ mod test_pd_routing {
        assert!(!pd_req.is_stream);
        assert_eq!(pd_req.batch_size, Some(2));

-        // Test PDRequest parsing from chat request
        let chat_json = json!({
            "messages": [
                {"role": "system", "content": "You are a helpful assistant"},
@@ -288,14 +266,12 @@ mod test_pd_routing {
        // Since we can't test the actual inject_bootstrap_fields function here
        // (it's private in the router module), we'll test the expected behavior

-        // Simulate bootstrap injection for single request
        let mut single_json = json!({
            "text": "Hello world",
            "stream": false,
            "temperature": 0.7
        });

-        // Create a prefill worker to simulate injection
        let prefill_worker: Box<dyn Worker> = Box::new(
            BasicWorkerBuilder::new("http://prefill1:8080")
                .worker_type(WorkerType::Prefill {
@@ -305,24 +281,20 @@ mod test_pd_routing {
                .build(),
        );

-        // Extract bootstrap port from worker type
        let bootstrap_port = match prefill_worker.worker_type() {
            WorkerType::Prefill { bootstrap_port } => bootstrap_port,
            _ => None,
        };

-        // Simulate what inject_bootstrap_fields would do
        single_json["bootstrap_host"] = json!(get_hostname(prefill_worker.url()));
        single_json["bootstrap_port"] = json!(bootstrap_port);
        single_json["bootstrap_room"] = json!(12345u64); // Random room ID

-        // Verify bootstrap fields are added correctly
        assert_eq!(single_json["bootstrap_host"], "prefill1");
        assert_eq!(single_json["bootstrap_port"], json!(Some(9000)));
        assert!(single_json["bootstrap_room"].is_u64());
        assert_eq!(single_json["temperature"], 0.7); // Original field preserved

-        // Simulate bootstrap injection for batch request
        let mut batch_json = json!({
            "text": ["Hello", "World", "Test"],
            "stream": true
@@ -334,7 +306,6 @@ mod test_pd_routing {
        batch_json["bootstrap_port"] = json!(vec![bootstrap_port; batch_size]);
        batch_json["bootstrap_room"] = json!(vec![111u64, 222u64, 333u64]);

-        // Verify batch bootstrap fields
        assert!(batch_json["bootstrap_host"].is_array());
        assert_eq!(
            batch_json["bootstrap_host"].as_array().unwrap().len(),
@@ -347,7 +318,6 @@ mod test_pd_routing {

    #[test]
    fn test_request_serialization() {
-        // Test that requests can be properly serialized and deserialized
        let request = json!({
            "text": "Test prompt",
            "stream": false,
@@ -360,13 +330,10 @@ mod test_pd_routing {
            "bootstrap_room": 12345u64
        });

-        // Convert to bytes (as would happen in the router)
        let bytes = serde_json::to_vec(&request).unwrap();

-        // Parse back from bytes
        let parsed: serde_json::Value = serde_json::from_slice(&bytes).unwrap();

-        // Verify all fields are preserved
        assert_eq!(parsed["text"], "Test prompt");
        assert_eq!(parsed["stream"], false);
        assert_eq!(parsed["temperature"], 0.7);
@@ -378,7 +345,6 @@ mod test_pd_routing {

    #[test]
    fn test_hostname_extraction() {
-        // Test various URL formats
        let test_cases = vec![
            ("http://localhost:8080", "localhost"),
            ("http://10.0.0.1:8080", "10.0.0.1"),
@@ -395,13 +361,11 @@ mod test_pd_routing {

    #[test]
    fn test_pd_request_edge_cases() {
-        // Test empty request
        let empty_json = json!({});
        let pd_req = PDRequest::from_json(&empty_json);
        assert!(!pd_req.is_stream);
        assert_eq!(pd_req.batch_size, None);

-        // Test request with only stream field
        let stream_only = json!({
            "stream": true
        });
@@ -409,14 +373,12 @@ mod test_pd_routing {
        assert!(pd_req.is_stream);
        assert_eq!(pd_req.batch_size, None);

-        // Test request with empty text array
        let empty_batch = json!({
            "text": []
        });
        let pd_req = PDRequest::from_json(&empty_batch);
        assert_eq!(pd_req.batch_size, Some(0));

-        // Test request with non-array text (should be None)
        let non_array_text = json!({
            "text": "single string"
        });
@@ -424,29 +386,21 @@ mod test_pd_routing {
        assert_eq!(pd_req.batch_size, None);
    }

-    // ========================================================================
-    // Phase 2: Background Load Monitoring Tests
-    // ========================================================================
-
    #[tokio::test]
    async fn test_background_load_monitoring() {
        use std::collections::HashMap;
        use tokio::sync::watch;

-        // Create a watch channel for testing
        let (tx, rx) = watch::channel(HashMap::new());

-        // Simulate load updates
        let mut loads = HashMap::new();
        loads.insert("http://prefill1:8080".to_string(), 10);
        loads.insert("http://prefill2:8080".to_string(), 20);
        loads.insert("http://decode1:8080".to_string(), 5);
        loads.insert("http://decode2:8080".to_string(), 15);

-        // Send the loads
        tx.send(loads.clone()).unwrap();

-        // Verify receiver gets the update
        let received_loads = rx.borrow();
        assert_eq!(received_loads.get("http://prefill1:8080"), Some(&10));
        assert_eq!(received_loads.get("http://prefill2:8080"), Some(&20));
@@ -456,7 +410,6 @@ mod test_pd_routing {

    #[test]
    fn test_load_monitoring_configuration() {
-        // Test that load monitoring is only enabled for PowerOfTwo policy
        let policies = vec![
            (PDSelectionPolicy::Random, false),
            (PDSelectionPolicy::PowerOfTwo, true),
@@ -483,42 +436,31 @@ mod test_pd_routing {
        use std::collections::HashMap;
        use tokio::sync::watch;

-        // Test watch channel's broadcast behavior
        let (tx, rx1) = watch::channel(HashMap::new());
        let rx2 = rx1.clone();

-        // Initial state - empty map
        assert!(rx1.borrow().is_empty());
        assert!(rx2.borrow().is_empty());

-        // Update 1
        let mut loads = HashMap::new();
        loads.insert("worker1".to_string(), 10);
        tx.send(loads.clone()).unwrap();

-        // Both receivers see the update
        assert_eq!(rx1.borrow().get("worker1"), Some(&10));
        assert_eq!(rx2.borrow().get("worker1"), Some(&10));

-        // Update 2 - overwrites previous
        loads.insert("worker1".to_string(), 20);
        loads.insert("worker2".to_string(), 30);
        tx.send(loads).unwrap();

-        // Both receivers see the latest state
        assert_eq!(rx1.borrow().get("worker1"), Some(&20));
        assert_eq!(rx2.borrow().get("worker2"), Some(&30));
    }

-    // ========================================================================
-    // Tests based on bench_one_batch_server.py patterns
-    // ========================================================================
-
    #[test]
    fn test_generate_request_formats() {
        // Based on bench_one_batch_server.py request patterns

-        // Test 1: Batch request with input_ids (most common in benchmarks)
        let batch_request = json!({
            "input_ids": [[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12]],
            "sampling_params": {
@@ -534,7 +476,6 @@ mod test_pd_routing {
        assert!(pd_req.is_stream);
        assert_eq!(pd_req.batch_size, Some(3));

-        // Test 2: Request with return_logprob (critical for PD)
        let logprob_request = json!({
            "input_ids": [[1, 2, 3]],
            "sampling_params": {
@@ -548,7 +489,6 @@ mod test_pd_routing {
        assert_eq!(logprob_request["return_logprob"], true);
        assert_eq!(logprob_request["stream"], false);

-        // Test 3: Large batch sizes from benchmark
        let batch_sizes = vec![1, 16, 64]; // From bench_one_batch_server.py
        for bs in batch_sizes {
            let request = json!({
@@ -567,7 +507,6 @@ mod test_pd_routing {

    #[test]
    fn test_sampling_params_handling() {
-        // Test various sampling parameters from bench_one_batch_server.py
        let sampling_params_variations = vec![
            json!({
                "temperature": 0.0,
@@ -595,14 +534,12 @@ mod test_pd_routing {
                "stream": false
            });

-            // Verify params are preserved
            assert_eq!(request["sampling_params"], params);
        }
    }

    #[test]
    fn test_streaming_response_parsing() {
-        // Test SSE format parsing from streaming responses
        let sse_chunks = ["data: {\"text\":\"Hello\",\"meta_info\":{\"completion_tokens\":1,\"finish_reason\":null}}",
            "data: {\"text\":\" world\",\"meta_info\":{\"completion_tokens\":2,\"finish_reason\":null}}",
            "data: {\"text\":\"!\",\"meta_info\":{\"completion_tokens\":3,\"finish_reason\":{\"type\":\"length\"}}}",
@@ -615,13 +552,11 @@ mod test_pd_routing {
            assert!(parsed["meta_info"]["completion_tokens"].is_u64());
        }

-        // Test [DONE] detection
        assert_eq!(sse_chunks[3], "data: [DONE]");
    }

    #[test]
    fn test_ttft_calculation() {
-        // Test Time To First Token calculation pattern
        let first_token_response = json!({
            "text": "Hello",
            "meta_info": {
@@ -637,7 +572,6 @@ mod test_pd_routing {

    #[test]
    fn test_throughput_metrics() {
-        // Test throughput calculation patterns from bench_one_batch_server.py
        let batch_size = 16;
        let input_len = 1024;
        let output_len = 16;
@@ -655,7 +589,6 @@ mod test_pd_routing {

    #[test]
    fn test_error_response_handling() {
-        // Test error response format from bench_one_batch_server.py
        let error_response = json!({
            "error": "Request has failed. Invalid input format."
        });
@@ -666,7 +599,6 @@ mod test_pd_routing {

    #[test]
    fn test_structured_output_request() {
-        // Test structured output format (json_schema)
        let structured_request = json!({
            "text": "What is the capital of France? Answer in JSON.",
            "sampling_params": {
@@ -687,7 +619,6 @@ mod test_pd_routing {
    fn test_bootstrap_injection_with_benchmark_requests() {
        use sglang_router_rs::core::{BasicWorkerBuilder, Worker, WorkerType};

-        // Test bootstrap injection with actual benchmark request patterns
        let mut benchmark_request = json!({
            "input_ids": vec![vec![1, 2, 3, 4]; 16], // Batch size 16
            "sampling_params": {
@@ -699,7 +630,6 @@ mod test_pd_routing {
            "stream": true
        });

-        // Create a prefill worker to simulate injection
        let prefill_worker: Box<dyn Worker> = Box::new(
            BasicWorkerBuilder::new("http://prefill:8080")
                .worker_type(WorkerType::Prefill {
@@ -709,7 +639,6 @@ mod test_pd_routing {
                .build(),
        );

-        // Extract bootstrap port from worker type
        let bootstrap_port = match prefill_worker.worker_type() {
            WorkerType::Prefill { bootstrap_port } => bootstrap_port,
            _ => None,
@@ -722,7 +651,6 @@ mod test_pd_routing {
        benchmark_request["bootstrap_room"] =
            json!((0..batch_size).map(|_| 12345u64).collect::<Vec<_>>());

-        // Verify bootstrap fields match batch size
        assert_eq!(
            benchmark_request["bootstrap_host"]
                .as_array()
@@ -745,14 +673,12 @@ mod test_pd_routing {
            batch_size
        );

-        // Verify original fields are preserved
        assert_eq!(benchmark_request["return_logprob"], true);
        assert_eq!(benchmark_request["stream"], true);
    }

    #[test]
    fn test_server_info_response_format() {
-        // Test server info format expected by bench_one_batch_server.py
        let server_info = json!({
            "internal_states": [{
                "avg_spec_accept_length": 3.5,
@@ -769,16 +695,13 @@ mod test_pd_routing {
            ]
        });

-        // Verify structure matches what benchmark expects
        assert!(server_info["internal_states"][0]["avg_spec_accept_length"].is_f64());
        assert!(server_info["internal_states"][0]["last_gen_throughput"].is_f64());
        assert!(server_info["prefill"].is_array());
        assert!(server_info["decode"].is_array());
    }

-    // ========================================================================
    // Comprehensive Endpoint Coverage Test
-    // ========================================================================

    #[test]
    fn test_pd_endpoints_coverage() {
@@ -807,7 +730,6 @@ mod test_pd_routing {
        assert_eq!(implemented_count, 10);
        assert_eq!(total_count, 11);

-        // Document the missing endpoint
        let missing: Vec<_> = implemented_endpoints
            .iter()
            .filter(|(_, _, impl_status)| !impl_status)
@@ -819,14 +741,12 @@ mod test_pd_routing {

    #[test]
    fn test_large_batch_bootstrap_injection() {
-        // Test bootstrap injection performance with very large batches
        // This simulates the bench_one_batch_server.py scenario
        let large_batch_sizes = vec![1024, 4096, 8192];

        for batch_size in large_batch_sizes {
            let start = std::time::Instant::now();

-            // Simulate a large batch request
            let mut large_batch_request = json!({
                "input_ids": vec![vec![1, 2, 3, 4]; batch_size],
                "sampling_params": {
@@ -836,7 +756,6 @@ mod test_pd_routing {
                "stream": true
            });

-            // Create a prefill worker to simulate injection
            let prefill_worker: Box<dyn Worker> = Box::new(
                BasicWorkerBuilder::new("http://prefill:8080")
                    .worker_type(WorkerType::Prefill {
@@ -846,7 +765,6 @@ mod test_pd_routing {
                    .build(),
            );

-            // Extract bootstrap port from worker type
            let bootstrap_port = match prefill_worker.worker_type() {
                WorkerType::Prefill { bootstrap_port } => bootstrap_port,
                _ => None,
@@ -861,7 +779,6 @@ mod test_pd_routing {

            let elapsed = start.elapsed();

-            // Verify bootstrap fields are correctly sized
            assert_eq!(
                large_batch_request["bootstrap_host"]
                    .as_array()
@@ -899,7 +816,6 @@ mod test_pd_routing {

    #[test]
    fn test_payload_size_calculation() {
-        // Test payload size estimation for bench_one_batch_server.py scenarios
        let test_cases = vec![
            (1, 1024, 16),   // Small batch
            (16, 1024, 16),  // Medium batch
@@ -937,14 +853,12 @@ mod test_pd_routing {

    #[test]
    fn test_policy_type_to_pd_selection_policy_mapping() {
-        // Test that PDSelectionPolicy doesn't include RoundRobin
        let pd_policy_count = 3; // Random, PowerOfTwo, CacheAware
        assert_eq!(
            pd_policy_count, 3,
            "PDSelectionPolicy should have exactly 3 variants"
        );

-        // Verify that each PDSelectionPolicy variant can be created
        let _random = PDSelectionPolicy::Random;
        let _po2 = PDSelectionPolicy::PowerOfTwo;
        let _cache_aware = PDSelectionPolicy::CacheAware {
--- a/sgl-router/tests/tokenizer_integration.rs
+++ b/sgl-router/tests/tokenizer_integration.rs
@@ -84,7 +84,6 @@ fn test_sequence_operations() {
    for prompt in TEST_PROMPTS.iter() {
        let encoding = tokenizer.encode(prompt).expect("Failed to encode prompt");

-        // Test Sequence with append_text
        let mut sequence = Sequence::new(tokenizer.clone());
        sequence.append_text(prompt).expect("Failed to append text");

@@ -95,7 +94,6 @@ fn test_sequence_operations() {
        );
        assert_eq!(sequence.text().unwrap(), *prompt, "Sequence text mismatch");

-        // Test incremental decoding with append_token
        let mut decoder = Sequence::new(tokenizer.clone());
        let mut output = String::new();

@@ -178,7 +176,6 @@ fn test_stop_sequence_decoder() {
            .expect("Failed to load tokenizer"),
    );

-    // Test with various stop sequences
    let test_cases = vec![
        (
            "Hello world! Stop here. Continue after.",
@@ -237,7 +234,6 @@ fn test_stop_sequence_decoder() {

 #[test]
 fn test_factory_creation() {
-    // Test factory creation method
    let tokenizer_path = ensure_tokenizer_cached();
    let tokenizer = factory::create_tokenizer(tokenizer_path.to_str().unwrap())
        .expect("Failed to create tokenizer via factory");
--- a/sgl-router/tests/tool_parser_deepseek.rs
+++ b/sgl-router/tests/tool_parser_deepseek.rs
@@ -6,7 +6,6 @@ use sglang_router_rs::tool_parser::{DeepSeekParser, ParseState, StreamResult, To
 async fn test_deepseek_complete_parsing() {
    let parser = DeepSeekParser::new();

-    // Test single tool call
    let input = r#"Let me help you with that.
 <｜tool▁calls▁begin｜><｜tool▁call▁begin｜>function<｜tool▁sep｜>get_weather
 ```json
@@ -18,7 +17,6 @@ The weather in Tokyo is..."#;
    assert_eq!(result.len(), 1);
    assert_eq!(result[0].function.name, "get_weather");

-    // Verify arguments
    let args: serde_json::Value = serde_json::from_str(&result[0].function.arguments).unwrap();
    assert_eq!(args["location"], "Tokyo");
    assert_eq!(args["units"], "celsius");
--- a/sgl-router/tests/tool_parser_edge_cases.rs
+++ b/sgl-router/tests/tool_parser_edge_cases.rs
@@ -167,8 +167,6 @@ async fn test_unicode_edge_cases() {

 #[tokio::test]
 async fn test_nested_brackets_in_strings() {
-    // Test that parsers correctly handle brackets within string literals
-
    let mistral_parser = MistralParser::new();
    let input = r#"[TOOL_CALLS] [{"name": "echo", "arguments": {"text": "Array: [1, 2, 3]"}}]"#;
    let result = mistral_parser.parse_complete(input).await.unwrap();
@@ -186,8 +184,6 @@ async fn test_nested_brackets_in_strings() {

 #[tokio::test]
 async fn test_multiple_formats_in_text() {
-    // Test that parsers don't get confused by other formats in the text
-
    let json_parser = JsonParser::new();
    let input = r#"
    Here's some text with [TOOL_CALLS] that shouldn't trigger.
@@ -272,7 +268,6 @@ async fn test_partial_token_at_buffer_boundary() {
    let parser = QwenParser::new();
    let mut state = ParseState::new();

-    // Test case that would fail with the bug:
    // Send exactly "<tool" which is a 5-character prefix of "<tool_call>\n"
    let result = parser.parse_incremental("<tool", &mut state).await.unwrap();
    assert!(matches!(result, StreamResult::Incomplete));
@@ -303,7 +298,6 @@ async fn test_partial_token_at_buffer_boundary() {
 async fn test_exact_prefix_lengths() {
    let parser = QwenParser::new();

-    // Test various exact prefix lengths that would be missed by exclusive range
    let test_cases = vec![
        ("<", 1),            // 1-char prefix
        ("<t", 2),           // 2-char prefix
--- a/sgl-router/tests/tool_parser_glm4_moe.rs
+++ b/sgl-router/tests/tool_parser_glm4_moe.rs
@@ -6,7 +6,6 @@ use sglang_router_rs::tool_parser::{Glm4MoeParser, ParseState, StreamResult, Too
 async fn test_glm4_complete_parsing() {
    let parser = Glm4MoeParser::new();

-    // Test single tool call
    let input = r#"Let me search for that.
 <tool_call>get_weather
 <arg_key>city</arg_key>
@@ -20,7 +19,6 @@ The weather will be..."#;
    assert_eq!(result.len(), 1);
    assert_eq!(result[0].function.name, "get_weather");

-    // Verify arguments
    let args: serde_json::Value = serde_json::from_str(&result[0].function.arguments).unwrap();
    assert_eq!(args["city"], "Beijing");
    assert_eq!(args["date"], "2024-12-25");
@@ -51,7 +49,6 @@ async fn test_glm4_multiple_tools() {
 async fn test_glm4_type_conversion() {
    let parser = Glm4MoeParser::new();

-    // Test various value types
    let input = r#"<tool_call>process
 <arg_key>count</arg_key>
 <arg_value>42</arg_value>
@@ -132,7 +129,6 @@ fn test_glm4_format_detection() {
 async fn test_glm4_python_literal_values() {
    let parser = Glm4MoeParser::new();

-    // Test Python-style boolean values
    let input = r#"<tool_call>config
 <arg_key>debug</arg_key>
 <arg_value>True</arg_value>
--- a/sgl-router/tests/tool_parser_gpt_oss.rs
+++ b/sgl-router/tests/tool_parser_gpt_oss.rs
@@ -6,7 +6,6 @@ use sglang_router_rs::tool_parser::{GptOssParser, ParseState, StreamResult, Tool
 async fn test_gpt_oss_complete_parsing() {
    let parser = GptOssParser::new();

-    // Test single tool call
    let input = r#"Let me search for that information.
 <|channel|>commentary to=functions.search<|constrain|>json<|message|>{"query": "rust programming", "limit": 10}<|call|>
 Here are the results..."#;
@@ -15,7 +14,6 @@ Here are the results..."#;
    assert_eq!(result.len(), 1);
    assert_eq!(result[0].function.name, "search");

-    // Verify arguments
    let args: serde_json::Value = serde_json::from_str(&result[0].function.arguments).unwrap();
    assert_eq!(args["query"], "rust programming");
    assert_eq!(args["limit"], 10);
@@ -38,7 +36,6 @@ async fn test_gpt_oss_multiple_tools() {
 async fn test_gpt_oss_with_namespace() {
    let parser = GptOssParser::new();

-    // Test with different namespace patterns
    let input = r#"<|channel|>commentary to=api.users.create<|constrain|>json<|message|>{"name": "John", "email": "john@example.com"}<|call|>
 <|channel|>commentary to=tools.calculator.add<|constrain|>json<|message|>{"x": 10, "y": 20}<|call|>"#;

@@ -52,7 +49,6 @@ async fn test_gpt_oss_with_namespace() {
 async fn test_gpt_oss_with_assistant_prefix() {
    let parser = GptOssParser::new();

-    // Test with <|start|>assistant prefix
    let input = r#"<|start|>assistant<|channel|>commentary to=functions.test<|constrain|>json<|message|>{"key": "value"}<|call|>"#;

    let result = parser.parse_complete(input).await.unwrap();
@@ -64,7 +60,6 @@ async fn test_gpt_oss_with_assistant_prefix() {
 async fn test_gpt_oss_empty_args() {
    let parser = GptOssParser::new();

-    // Test with empty arguments
    let input =
        r#"<|channel|>commentary to=functions.get_time<|constrain|>json<|message|>{}<|call|>"#;

@@ -130,7 +125,6 @@ fn test_gpt_oss_format_detection() {
 async fn test_gpt_oss_with_whitespace() {
    let parser = GptOssParser::new();

-    // Test with whitespace after function name
    let input = r#"<|channel|>commentary to=functions.test  <|constrain|>json<|message|>{"key": "value"}<|call|>"#;

    let result = parser.parse_complete(input).await.unwrap();
@@ -142,7 +136,6 @@ async fn test_gpt_oss_with_whitespace() {
 async fn test_gpt_oss_complex_json() {
    let parser = GptOssParser::new();

-    // Test with complex nested JSON
    let input = r#"<|channel|>commentary to=functions.process<|constrain|>json<|message|>{
    "nested": {
        "data": [1, 2, 3],
--- a/sgl-router/tests/tool_parser_kimik2.rs
+++ b/sgl-router/tests/tool_parser_kimik2.rs
@@ -6,7 +6,6 @@ use sglang_router_rs::tool_parser::{KimiK2Parser, ParseState, StreamResult, Tool
 async fn test_kimik2_complete_parsing() {
    let parser = KimiK2Parser::new();

-    // Test single tool call
    let input = r#"Let me help you with that.
 <|tool_calls_section_begin|>
 <|tool_call_begin|>functions.get_weather:0<|tool_call_argument_begin|>{"location": "Tokyo", "units": "celsius"}<|tool_call_end|>
@@ -17,7 +16,6 @@ The weather in Tokyo is..."#;
    assert_eq!(result.len(), 1);
    assert_eq!(result[0].function.name, "get_weather");

-    // Verify arguments
    let args: serde_json::Value = serde_json::from_str(&result[0].function.arguments).unwrap();
    assert_eq!(args["location"], "Tokyo");
    assert_eq!(args["units"], "celsius");
@@ -42,7 +40,6 @@ async fn test_kimik2_multiple_tools() {
 async fn test_kimik2_with_whitespace() {
    let parser = KimiK2Parser::new();

-    // Test with extra whitespace
    let input = r#"<|tool_calls_section_begin|>
 <|tool_call_begin|> functions.test:0 <|tool_call_argument_begin|> {"key": "value", "num": 42} <|tool_call_end|>
 <|tool_calls_section_end|>"#;
@@ -114,7 +111,6 @@ fn test_kimik2_format_detection() {
 async fn test_kimik2_sequential_indices() {
    let parser = KimiK2Parser::new();

-    // Test with proper sequential indexing
    let input = r#"<|tool_calls_section_begin|>
 <|tool_call_begin|>functions.first:0<|tool_call_argument_begin|>{"param": "a"}<|tool_call_end|>
 <|tool_call_begin|>functions.second:1<|tool_call_argument_begin|>{"param": "b"}<|tool_call_end|>
--- a/sgl-router/tests/tool_parser_llama.rs
+++ b/sgl-router/tests/tool_parser_llama.rs
@@ -116,7 +116,6 @@ async fn test_llama_real_world_output() {
    assert_eq!(result.len(), 1);
    assert_eq!(result[0].function.name, "web_search");

-    // Test with nicely formatted JSON
    let formatted_input = r#"<|python_tag|>{
    "name": "get_current_time",
    "arguments": {
@@ -144,7 +143,6 @@ async fn test_llama_json_array_format() {

 #[tokio::test]
 async fn test_single_json() {
-    // Test parsing plain JSON without python_tag
    let parser = LlamaParser::new();
    let text = r#"{"name": "get_weather", "arguments": {"city": "Paris"}}"#;

@@ -158,7 +156,6 @@ async fn test_single_json() {

 #[tokio::test]
 async fn test_multiple_json_with_separator() {
-    // Test multiple JSON objects with semicolon separator
    let parser = LlamaParser::new();
    let text = r#"<|python_tag|>{"name": "get_weather", "arguments": {"city": "Paris"}};{"name": "get_tourist_attractions", "arguments": {"city": "Paris"}}"#;

@@ -170,7 +167,6 @@ async fn test_multiple_json_with_separator() {

 #[tokio::test]
 async fn test_multiple_json_with_separator_customized() {
-    // Test multiple JSON objects with python_tag repeated
    let parser = LlamaParser::new();
    let text = r#"<|python_tag|>{"name": "get_weather", "arguments": {}}<|python_tag|>{"name": "get_tourist_attractions", "arguments": {}}"#;

@@ -182,7 +178,6 @@ async fn test_multiple_json_with_separator_customized() {

 #[tokio::test]
 async fn test_json_with_trailing_text() {
-    // Test JSON with trailing text after
    let parser = LlamaParser::new();
    let text = r#"{"name": "get_weather", "arguments": {}} Some follow-up text"#;

@@ -193,7 +188,6 @@ async fn test_json_with_trailing_text() {

 #[tokio::test]
 async fn test_invalid_then_valid_json() {
-    // Test error recovery - invalid JSON followed by valid JSON
    let parser = LlamaParser::new();
    let text = r#"{"name": "get_weather", "arguments": {{"name": "get_weather", "arguments": {}}"#;

@@ -206,7 +200,6 @@ async fn test_invalid_then_valid_json() {

 #[tokio::test]
 async fn test_plain_text_only() {
-    // Test plain text with no tool calls
    let parser = LlamaParser::new();
    let text = "This is just plain explanation text.";

@@ -216,7 +209,6 @@ async fn test_plain_text_only() {

 #[tokio::test]
 async fn test_with_python_tag_prefix() {
-    // Test text before python_tag
    let parser = LlamaParser::new();
    let text = r#"Some intro. <|python_tag|>{"name": "get_weather", "arguments": {}}"#;

@@ -225,9 +217,7 @@ async fn test_with_python_tag_prefix() {
    assert_eq!(result[0].function.name, "get_weather");
 }

-// ============================================================================
 // STREAMING TESTS
-// ============================================================================

 #[tokio::test]
 async fn test_llama_streaming_simple() {
@@ -332,7 +322,6 @@ async fn test_llama_streaming_with_text_before() {

 #[tokio::test]
 async fn test_llama_streaming_multiple_tools() {
-    // Test streaming multiple tool calls with semicolon separator
    let parser = LlamaParser::new();
    let mut state = sglang_router_rs::tool_parser::ParseState::new();

@@ -361,7 +350,6 @@ async fn test_llama_streaming_multiple_tools() {

 #[tokio::test]
 async fn test_llama_streaming_multiple_tools_chunked() {
-    // Test streaming multiple tool calls arriving in chunks
    let parser = LlamaParser::new();
    let mut state = sglang_router_rs::tool_parser::ParseState::new();

--- a/sgl-router/tests/tool_parser_mixed_edge_cases.rs
+++ b/sgl-router/tests/tool_parser_mixed_edge_cases.rs
@@ -10,8 +10,6 @@ use sglang_router_rs::tool_parser::{

 #[tokio::test]
 async fn test_mixed_formats_in_text() {
-    // Test that parsers correctly ignore other formats' markers
-
    let json_parser = JsonParser::new();
    let input = r#"
    Some text with [TOOL_CALLS] marker that shouldn't trigger.
@@ -37,8 +35,6 @@ async fn test_mixed_formats_in_text() {

 #[tokio::test]
 async fn test_format_markers_in_string_content() {
-    // Test that format markers inside string content don't interfere
-
    let pythonic_parser = PythonicParser::new();
    let input = r#"[echo(text="Use [TOOL_CALLS] and <tool_call> in text")]"#;

@@ -101,7 +97,6 @@ async fn test_multiple_sequential_calls_different_formats() {
    assert_eq!(result.len(), 1);
    assert_eq!(result[0].function.name, "call1");

-    // Test plain JSON separately
    let input2 = r#"{"name": "call2", "arguments": {"x": 1}}"#;
    let result2 = llama_parser.parse_complete(input2).await.unwrap();
    assert_eq!(result2.len(), 1);
@@ -133,7 +128,6 @@ async fn test_empty_and_whitespace_variations() {
 async fn test_special_json_values() {
    let json_parser = JsonParser::new();

-    // Test various special JSON values
    let input = r#"{
        "name": "test_special",
        "arguments": {
@@ -183,8 +177,6 @@ async fn test_parser_recovery_after_invalid_input() {

 #[tokio::test]
 async fn test_boundary_cases_for_extraction() {
-    // Test edge cases in JSON extraction from text
-
    let json_parser = JsonParser::new();

    // JSON at the very beginning
@@ -259,7 +251,6 @@ async fn test_mistral_with_pretty_json() {
 async fn test_qwen_with_cdata_like_content() {
    let parser = QwenParser::new();

-    // Test with content that looks like CDATA but isn't
    // Note: QwenParser expects exactly "<tool_call>\n" with the newline
    let input = r#"<tool_call>
 {"name": "process", "arguments": {"xml": "<![CDATA[some data]]>"}}
--- a/sgl-router/tests/tool_parser_pythonic.rs
+++ b/sgl-router/tests/tool_parser_pythonic.rs
@@ -180,7 +180,6 @@ These functions will provide the information you need."#;
 async fn test_pythonic_nested_brackets_in_lists() {
    let parser = PythonicParser::new();

-    // Test nested brackets within list arguments
    let input = r#"[process_matrix(data=[[1, 2], [3, 4]], labels=["row[0]", "row[1]"])]"#;

    let result = parser.parse_complete(input).await.unwrap();
@@ -196,7 +195,6 @@ async fn test_pythonic_nested_brackets_in_lists() {
 async fn test_pythonic_nested_brackets_in_dicts() {
    let parser = PythonicParser::new();

-    // Test nested brackets within dictionary arguments
    let input =
        r#"[analyze(config={"patterns": ["[a-z]+", "[0-9]+"], "nested": {"list": [1, [2, 3]]}})]"#;

@@ -213,7 +211,6 @@ async fn test_pythonic_nested_brackets_in_dicts() {
 async fn test_pythonic_mixed_quotes() {
    let parser = PythonicParser::new();

-    // Test mixed quote types in arguments
    let input = r#"[format_text(single='Hello', double="World", mixed="It's \"quoted\"")]"#;

    let result = parser.parse_complete(input).await.unwrap();
@@ -230,7 +227,6 @@ async fn test_pythonic_mixed_quotes() {
 async fn test_pythonic_complex_nesting() {
    let parser = PythonicParser::new();

-    // Test complex nested structures
    let input = r#"[transform(
        matrix=[[1, [2, 3]], [4, [5, [6, 7]]]],
        operations=[{"type": "scale", "factor": [2, 3]}, {"type": "rotate", "angle": 90}],
@@ -250,7 +246,6 @@ async fn test_pythonic_complex_nesting() {

 #[tokio::test]
 async fn test_parse_streaming_no_brackets() {
-    // Test parsing text with no brackets (no tool calls)
    let parser = PythonicParser::new();
    let mut state = sglang_router_rs::tool_parser::ParseState::new();

@@ -268,7 +263,6 @@ async fn test_parse_streaming_no_brackets() {

 #[tokio::test]
 async fn test_parse_streaming_complete_tool_call() {
-    // Test parsing a complete tool call
    let parser = PythonicParser::new();
    let mut state = sglang_router_rs::tool_parser::ParseState::new();

@@ -289,7 +283,6 @@ async fn test_parse_streaming_complete_tool_call() {

 #[tokio::test]
 async fn test_parse_streaming_text_before_tool_call() {
-    // Test parsing text that appears before a tool call
    let parser = PythonicParser::new();
    let mut state = sglang_router_rs::tool_parser::ParseState::new();

@@ -308,7 +301,6 @@ async fn test_parse_streaming_text_before_tool_call() {

 #[tokio::test]
 async fn test_parse_streaming_partial_tool_call() {
-    // Test parsing a partial tool call that spans multiple chunks
    let parser = PythonicParser::new();
    let mut state = sglang_router_rs::tool_parser::ParseState::new();

@@ -340,7 +332,6 @@ async fn test_parse_streaming_partial_tool_call() {

 #[tokio::test]
 async fn test_parse_streaming_bracket_without_text_before() {
-    // Test parsing a tool call that starts at the beginning of the text
    let parser = PythonicParser::new();
    let mut state = sglang_router_rs::tool_parser::ParseState::new();

@@ -359,7 +350,6 @@ async fn test_parse_streaming_bracket_without_text_before() {

 #[tokio::test]
 async fn test_parse_streaming_text_after_tool_call() {
-    // Test parsing text that appears after a tool call
    let parser = PythonicParser::new();
    let mut state = sglang_router_rs::tool_parser::ParseState::new();

@@ -379,7 +369,6 @@ async fn test_parse_streaming_text_after_tool_call() {

 #[tokio::test]
 async fn test_parse_streaming_multiple_tool_calls() {
-    // Test parsing multiple tool calls in sequence
    let parser = PythonicParser::new();
    let mut state = sglang_router_rs::tool_parser::ParseState::new();

@@ -401,7 +390,6 @@ async fn test_parse_streaming_multiple_tool_calls() {

 #[tokio::test]
 async fn test_parse_streaming_opening_bracket_only() {
-    // Test parsing text with only an opening bracket but no closing bracket
    let parser = PythonicParser::new();
    let mut state = sglang_router_rs::tool_parser::ParseState::new();

@@ -418,7 +406,6 @@ async fn test_parse_streaming_opening_bracket_only() {

 #[tokio::test]
 async fn test_parse_streaming_nested_brackets() {
-    // Test parsing tool calls with nested brackets in arguments
    let parser = PythonicParser::new();
    let mut state = sglang_router_rs::tool_parser::ParseState::new();

@@ -439,7 +426,6 @@ async fn test_parse_streaming_nested_brackets() {

 #[tokio::test]
 async fn test_parse_streaming_nested_brackets_dict() {
-    // Test parsing tool calls with nested dictionaries and lists
    let parser = PythonicParser::new();
    let mut state = sglang_router_rs::tool_parser::ParseState::new();

@@ -460,7 +446,6 @@ async fn test_parse_streaming_nested_brackets_dict() {

 #[tokio::test]
 async fn test_parse_streaming_multiple_tools_with_nested_brackets() {
-    // Test parsing multiple tool calls with nested brackets
    let parser = PythonicParser::new();
    let mut state = sglang_router_rs::tool_parser::ParseState::new();

@@ -480,7 +465,6 @@ async fn test_parse_streaming_multiple_tools_with_nested_brackets() {

 #[tokio::test]
 async fn test_parse_streaming_partial_nested_brackets() {
-    // Test parsing partial tool calls with nested brackets across chunks
    let parser = PythonicParser::new();
    let mut state = sglang_router_rs::tool_parser::ParseState::new();

@@ -514,7 +498,6 @@ async fn test_parse_streaming_partial_nested_brackets() {

 #[tokio::test]
 async fn test_parse_streaming_with_python_start_and_end_token() {
-    // Test parsing a message that starts with <|python_start|> and <|python_end|> across chunks
    let parser = PythonicParser::new();
    let mut state = sglang_router_rs::tool_parser::ParseState::new();

@@ -544,7 +527,6 @@ async fn test_parse_streaming_with_python_start_and_end_token() {

 #[tokio::test]
 async fn test_detect_and_parse_with_python_start_and_end_token() {
-    // Test parsing a message that starts with <|python_start|> and contains a valid tool call
    let parser = PythonicParser::new();

    let text = "User wants to get the weather in Mars. <|python_start|>[get_weather(location='Mars', unit='celsius')]<|python_end|> In this way we will get the weather in Mars.";
--- a/sgl-router/tests/tool_parser_qwen.rs
+++ b/sgl-router/tests/tool_parser_qwen.rs
@@ -189,7 +189,6 @@ async fn test_buffer_drain_optimization() {
    // First chunk - incomplete tool call
    let chunk1 = "<tool_call>\n{\"name\": \"test1\", ";
    let _result = parser.parse_incremental(chunk1, &mut state).await.unwrap();
-    // Phase 2 simplified streaming might not handle partial JSON correctly
    // The important thing is buffer accumulation works
    assert!(!state.buffer.is_empty());

@@ -197,32 +196,23 @@ async fn test_buffer_drain_optimization() {
    let chunk2 = "\"arguments\": {}}\n</tool_call><tool_call>\n{\"name\": \"test2\", ";
    let result = parser.parse_incremental(chunk2, &mut state).await.unwrap();

-    match result {
-        StreamResult::ToolComplete(tool) => {
-            assert_eq!(tool.function.name, "test1");
-            // After consuming the first tool, buffer should contain only the second tool start
-            assert!(state.buffer.starts_with("<tool_call>"));
-            assert!(state.buffer.contains("test2"));
-        }
-        _ => {
-            // Phase 2 simplified streaming might return Incomplete
-            // The important thing is the buffer is managed correctly
-        }
+    if let StreamResult::ToolComplete(tool) = result {
+        assert_eq!(tool.function.name, "test1");
+        // After consuming the first tool, buffer should contain only the second tool start
+        assert!(state.buffer.starts_with("<tool_call>"));
+        assert!(state.buffer.contains("test2"));
+    } else {
+        // The important thing is the buffer is managed correctly
    }

    // Complete the second tool
    let chunk3 = "\"arguments\": {\"x\": 1}}\n</tool_call>";
    let result = parser.parse_incremental(chunk3, &mut state).await.unwrap();

-    match result {
-        StreamResult::ToolComplete(tool) => {
-            assert_eq!(tool.function.name, "test2");
-            // Buffer should be empty after consuming all tools
-            assert!(state.buffer.is_empty() || !state.buffer.contains("</tool_call>"));
-        }
-        _ => {
-            // Phase 2 simplified streaming might handle this differently
-        }
+    if let StreamResult::ToolComplete(tool) = result {
+        assert_eq!(tool.function.name, "test2");
+        // Buffer should be empty after consuming all tools
+        assert!(state.buffer.is_empty() || !state.buffer.contains("</tool_call>"));
    }
 }

@@ -253,7 +243,4 @@ async fn test_buffer_efficiency_with_multiple_tools() {
            // Simplified streaming might return Incomplete
        }
    }
-
-    // Verify no memory issues or panics occurred with drain()
-    // Test passes if we reach this point without panic
 }
--- a/sgl-router/tests/tool_parser_registry.rs
+++ b/sgl-router/tests/tool_parser_registry.rs
@@ -126,7 +126,6 @@ async fn test_unknown_model_fallback() {
 async fn test_pattern_specificity() {
    let registry = ParserRegistry::new();

-    // Test that more specific patterns take precedence
    // llama-4* should match before llama-*
    let parser = registry.get_parser("llama-4-70b").unwrap();
    assert!(parser.detect_format(r#"[test_function(x=1)]"#)); // Pythonic format
@@ -139,7 +138,6 @@ async fn test_pattern_specificity() {
 async fn test_real_world_model_outputs() {
    let registry = ParserRegistry::new();

-    // Test with realistic outputs from different models
    let test_cases = vec![
        (
            "gpt-4",
--- a/sgl-router/tests/tool_parser_step3.rs
+++ b/sgl-router/tests/tool_parser_step3.rs
@@ -6,7 +6,6 @@ use sglang_router_rs::tool_parser::{ParseState, Step3Parser, StreamResult, ToolP
 async fn test_step3_complete_parsing() {
    let parser = Step3Parser::new();

-    // Test single tool call
    let input = r#"Let me help you.
 <｜tool_calls_begin｜>
 <｜tool_call_begin｜>function<｜tool_sep｜><steptml:invoke name="search">
@@ -20,7 +19,6 @@ Here are the results..."#;
    assert_eq!(result.len(), 1);
    assert_eq!(result[0].function.name, "search");

-    // Verify arguments
    let args: serde_json::Value = serde_json::from_str(&result[0].function.arguments).unwrap();
    assert_eq!(args["query"], "rust programming");
    assert_eq!(args["limit"], 10);
@@ -127,7 +125,6 @@ fn test_step3_format_detection() {
 async fn test_step3_nested_steptml() {
    let parser = Step3Parser::new();

-    // Test with complex parameter values
    let input = r#"<｜tool_calls_begin｜>
 <｜tool_call_begin｜>function<｜tool_sep｜><steptml:invoke name="config">
 <steptml:parameter name="settings">{"nested": {"key": "value"}}</steptml:parameter>
@@ -148,7 +145,6 @@ async fn test_step3_nested_steptml() {
 async fn test_step3_python_literals() {
    let parser = Step3Parser::new();

-    // Test Python-style literals
    let input = r#"<｜tool_calls_begin｜>
 <｜tool_call_begin｜>function<｜tool_sep｜><steptml:invoke name="test">
 <steptml:parameter name="bool_true">True</steptml:parameter>
@@ -211,7 +207,6 @@ async fn test_json_parameter_values() {
 async fn test_step3_parameter_with_angle_brackets() {
    let parser = Step3Parser::new();

-    // Test parameter value containing < character
    let input = r#"<｜tool_calls_begin｜>
 <｜tool_call_begin｜>function<｜tool_sep｜><steptml:invoke name="compare">
 <steptml:parameter name="expression">a < b && b > c</steptml:parameter>
@@ -223,7 +218,6 @@ async fn test_step3_parameter_with_angle_brackets() {
    assert_eq!(result.len(), 1);
    assert_eq!(result[0].function.name, "compare");

-    // Verify the parameter value was parsed correctly
    let args: serde_json::Value = serde_json::from_str(&result[0].function.arguments).unwrap();
    assert_eq!(args["expression"], "a < b && b > c");
    assert_eq!(args["context"], "comparison test");
@@ -233,7 +227,6 @@ async fn test_step3_parameter_with_angle_brackets() {
 async fn test_step3_empty_function_name() {
    let parser = Step3Parser::new();

-    // Test empty function name
    let input = r#"<｜tool_calls_begin｜>
 <｜tool_call_begin｜>function<｜tool_sep｜><steptml:invoke name="">
 <steptml:parameter name="param">value</steptml:parameter>
--- a/sgl-router/tests/tool_parser_streaming.rs
+++ b/sgl-router/tests/tool_parser_streaming.rs
@@ -12,8 +12,6 @@ async fn test_json_streaming_simple() {
    let parser = JsonParser::new();
    let mut state = ParseState::new();

-    // Phase 2 note: This test sends the full JSON at once in the last chunk
-    // In real streaming, chunks would be smaller
    let full_json = r#"{"name": "get_weather", "arguments": {"location": "San Francisco"}}"#;

    let result = parser
@@ -21,7 +19,6 @@ async fn test_json_streaming_simple() {
        .await
        .unwrap();

-    // With complete JSON sent at once, we should get ToolComplete
    match result {
        StreamResult::ToolComplete(tool) => {
            assert_eq!(tool.function.name, "get_weather");
@@ -37,7 +34,6 @@ async fn test_json_streaming_array() {
    let parser = JsonParser::new();
    let mut state = ParseState::new();

-    // Stream a JSON array of tools
    let chunks = vec![
        r#"["#,
        r#"{"name": "tool1", "#,
@@ -57,7 +53,6 @@ async fn test_json_streaming_array() {
    }

    // Current implementation may handle this differently
-    // We're mainly testing that it doesn't crash
    assert!(tool_count <= 2, "Should parse at most 2 tools");
 }

@@ -95,7 +90,6 @@ async fn test_pythonic_streaming() {
    let parser = PythonicParser::new();
    let mut state = ParseState::new();

-    // Send complete pythonic format at once
    let full_input = r#"[get_weather(city="London", units="celsius")]"#;

    let result = parser
@@ -149,7 +143,6 @@ async fn test_qwen_streaming() {
    let parser = QwenParser::new();
    let mut state = ParseState::new();

-    // Send complete Qwen format at once (with exact format expected by parser)
    // Note: Parser expects newline after both tags
    let full_input = "<tool_call>\n{\"name\": \"translate\", \"arguments\": {\"text\": \"hello\", \"to\": \"zh\"}}\n</tool_call>";

@@ -176,12 +169,10 @@ async fn test_streaming_incomplete_stays_incomplete() {
    let parser = JsonParser::new();
    let mut state = ParseState::new();

-    // Send truly incomplete JSON that can't be auto-completed
    let chunks = vec![r#"{"na"#, r#"me": "#];

    for chunk in chunks {
        let result = parser.parse_incremental(chunk, &mut state).await.unwrap();
-        // Should return Incomplete for partial JSON that can't be auto-completed
        assert!(
            matches!(result, StreamResult::Incomplete),
            "Should return Incomplete for partial JSON, got: {:?}",
@@ -189,7 +180,6 @@ async fn test_streaming_incomplete_stays_incomplete() {
        );
    }

-    // Buffer should contain the accumulated incomplete JSON
    assert!(!state.buffer.is_empty());
 }

@@ -198,8 +188,6 @@ async fn test_streaming_with_text_before_tool() {
    let parser = JsonParser::new();
    let mut state = ParseState::new();

-    // For streaming, the parser expects clean JSON
-    // Mixed text extraction only works in parse_complete, not parse_incremental
    let full_input = r#"{"name": "test", "arguments": {}}"#;

    let result = parser
@@ -221,10 +209,8 @@ async fn test_streaming_with_text_before_tool() {
 async fn test_streaming_buffer_accumulation() {
    let parser = JsonParser::new();

-    // Test: Complete JSON should clear buffer after parsing
    let mut state = ParseState::new();

-    // Send partial JSON that can't be interpreted as complete
    let result1 = parser
        .parse_incremental(r#"{"na"#, &mut state)
        .await
@@ -236,7 +222,6 @@ async fn test_streaming_buffer_accumulation() {
        "Buffer should accumulate incomplete JSON"
    );

-    // Send rest of JSON
    let result2 = parser
        .parse_incremental(r#"me": "test", "arguments": {}}"#, &mut state)
        .await
@@ -262,7 +247,6 @@ async fn test_streaming_multiple_tools_sequential() {
    let parser = QwenParser::new();
    let mut state = ParseState::new();

-    // Send complete Qwen format with newlines
    let full_input = r#"<tool_call>
 {"name": "tool1", "arguments": {}}
 </tool_call>"#;
@@ -286,13 +270,11 @@ async fn test_streaming_multiple_tools_sequential() {
 async fn test_streaming_reset_after_error() {
    let parser = JsonParser::new();

-    // First attempt with invalid JSON
    let mut state1 = ParseState::new();
    let _ = parser
        .parse_incremental(r#"{"name": invalid}"#, &mut state1)
        .await;

-    // Second attempt with valid JSON should work with fresh state
    let mut state2 = ParseState::new();
    let result = parser
        .parse_incremental(r#"{"name": "test", "arguments": {}}"#, &mut state2)
@@ -309,7 +291,6 @@ async fn test_streaming_with_unicode_chunks() {
    let parser = JsonParser::new();
    let mut state = ParseState::new();

-    // Send complete JSON with unicode
    let full_input = r#"{"name": "translate", "arguments": {"text": "Hello 世界 🌍"}}"#;

    let result = parser
@@ -317,8 +298,6 @@ async fn test_streaming_with_unicode_chunks() {
        .await
        .unwrap();

-    // Phase 2 may return partial results even with complete JSON
-    // The important thing is that unicode is handled without crashes
    match result {
        StreamResult::ToolComplete(tool) => {
            assert_eq!(tool.function.name, "translate");
@@ -327,10 +306,8 @@ async fn test_streaming_with_unicode_chunks() {
        }
        StreamResult::ToolName { name, .. } => {
            assert_eq!(name, "translate");
-            // Phase 2 partial streaming behavior - acceptable
        }
        StreamResult::ToolArguments { arguments, .. } => {
-            // Verify unicode was preserved
            let args: serde_json::Value = serde_json::from_str(&arguments).unwrap();
            assert!(args["text"].as_str().unwrap().contains("世界"));
        }
--- a/sgl-router/tests/tool_parser_wrapper_tokens.rs
+++ b/sgl-router/tests/tool_parser_wrapper_tokens.rs
@@ -25,20 +25,17 @@ async fn test_json_with_xml_style_wrapper() {

 #[tokio::test]
 async fn test_json_with_multiple_wrapper_pairs() {
-    // Test with multiple start/end token pairs
    let parser = JsonParser::with_config(TokenConfig {
        start_tokens: vec!["<tool>".to_string(), "<<TOOL>>".to_string()],
        end_tokens: vec!["</tool>".to_string(), "<</TOOL>>".to_string()],
        separator: ", ".to_string(),
    });

-    // Test first pair
    let input1 = r#"<tool>{"name": "tool1", "arguments": {}}</tool>"#;
    let result1 = parser.parse_complete(input1).await.unwrap();
    assert_eq!(result1.len(), 1);
    assert_eq!(result1[0].function.name, "tool1");

-    // Test second pair
    let input2 = r#"<<TOOL>>{"name": "tool2", "arguments": {}}<</TOOL>>"#;
    let result2 = parser.parse_complete(input2).await.unwrap();
    assert_eq!(result2.len(), 1);
@@ -47,7 +44,6 @@ async fn test_json_with_multiple_wrapper_pairs() {

 #[tokio::test]
 async fn test_json_with_only_start_token() {
-    // Test when only start token is provided (no end token)
    let parser = JsonParser::with_config(TokenConfig {
        start_tokens: vec![">>>FUNCTION:".to_string()],
        end_tokens: vec!["".to_string()], // Empty end token
@@ -232,7 +228,6 @@ async fn test_json_incomplete_wrapper_tokens() {

 #[tokio::test]
 async fn test_json_empty_wrapper_tokens() {
-    // Test with empty wrapper tokens (should behave like default)
    let parser = JsonParser::with_config(TokenConfig {
        start_tokens: vec![],
        end_tokens: vec![],