diff --git a/docs/advanced_features/server_arguments.md b/docs/advanced_features/server_arguments.md index b88f3d8a7..584cebe6c 100644 --- a/docs/advanced_features/server_arguments.md +++ b/docs/advanced_features/server_arguments.md @@ -164,8 +164,8 @@ Please consult the documentation below and [server_args.py](https://github.com/s | `--kv-events-config` | Config in json format for NVIDIA dynamo KV event publishing. Publishing will be enabled if this flag is used. | None | | `--decode-log-interval` | The log interval of decode batch. | 40 | | `--enable-request-time-stats-logging` | Enable per request time stats logging. | False | -| `--prompt-tokens-buckets` | The buckets rule of prompt tokens. Supports 3 rule types: 'default' uses predefined buckets; 'tse ' generates two sides exponential distributed buckets (e.g., 'tse 1000 2 8' generates buckets [984.0, 992.0, 996.0, 998.0, 1000.0, 1002.0, 1004.0, 1008.0, 1016.0]).); 'customer ...' uses custom bucket values (e.g., 'customer 10 50 100 500'). | None | -| `--generation-tokens-buckets` | The buckets rule of prompt tokens. Supports 3 rule types: 'default' uses predefined buckets; 'tse ' generates two sides exponential distributed buckets (e.g., 'tse 1000 2 8' generates buckets [984.0, 992.0, 996.0, 998.0, 1000.0, 1002.0, 1004.0, 1008.0, 1016.0]).); 'customer ...' uses custom bucket values (e.g., 'customer 10 50 100 500'). | None | +| `--prompt-tokens-buckets` | The buckets rule of prompt tokens. Supports 3 rule types: 'default' uses predefined buckets; 'tse ' generates two sides exponential distributed buckets (e.g., 'tse 1000 2 8' generates buckets [984.0, 992.0, 996.0, 998.0, 1000.0, 1002.0, 1004.0, 1008.0, 1016.0]).); 'custom ...' uses custom bucket values (e.g., 'custom 10 50 100 500'). | None | +| `--generation-tokens-buckets` | The buckets rule of generation tokens. Supports 3 rule types: 'default' uses predefined buckets; 'tse ' generates two sides exponential distributed buckets (e.g., 'tse 1000 2 8' generates buckets [984.0, 992.0, 996.0, 998.0, 1000.0, 1002.0, 1004.0, 1008.0, 1016.0]).); 'custom ...' uses custom bucket values (e.g., 'custom 10 50 100 500'). | None | ## API related diff --git a/python/sglang/srt/entrypoints/openai/protocol.py b/python/sglang/srt/entrypoints/openai/protocol.py index ec6ce7f99..fc95116f8 100644 --- a/python/sglang/srt/entrypoints/openai/protocol.py +++ b/python/sglang/srt/entrypoints/openai/protocol.py @@ -235,8 +235,8 @@ class CompletionRequest(BaseModel): # Priority for the request priority: Optional[int] = None - # For customer metric labels - customer_labels: Optional[Dict[str, str]] = None + # For custom metric labels + custom_labels: Optional[Dict[str, str]] = None @field_validator("max_tokens") @classmethod diff --git a/python/sglang/srt/entrypoints/openai/serving_base.py b/python/sglang/srt/entrypoints/openai/serving_base.py index f0038bdea..a57b71d8f 100644 --- a/python/sglang/srt/entrypoints/openai/serving_base.py +++ b/python/sglang/srt/entrypoints/openai/serving_base.py @@ -27,10 +27,10 @@ class OpenAIServingBase(ABC): self.tokenizer_manager = tokenizer_manager self.allowed_custom_labels = ( set( - self.tokenizer_manager.server_args.tokenizer_metrics_allowed_customer_labels + self.tokenizer_manager.server_args.tokenizer_metrics_allowed_custom_labels ) if isinstance(self.tokenizer_manager.server_args, ServerArgs) - and self.tokenizer_manager.server_args.tokenizer_metrics_allowed_customer_labels + and self.tokenizer_manager.server_args.tokenizer_metrics_allowed_custom_labels else None ) @@ -178,14 +178,14 @@ class OpenAIServingBase(ABC): ) return json.dumps({"error": error.model_dump()}) - def extract_customer_labels(self, raw_request): + def extract_custom_labels(self, raw_request): if ( not self.allowed_custom_labels or not self.tokenizer_manager.server_args.tokenizer_metrics_custom_labels_header ): return None - customer_labels = None + custom_labels = None header = ( self.tokenizer_manager.server_args.tokenizer_metrics_custom_labels_header ) @@ -200,9 +200,9 @@ class OpenAIServingBase(ABC): raw_labels = None if isinstance(raw_labels, dict): - customer_labels = { + custom_labels = { label: value for label, value in raw_labels.items() if label in self.allowed_custom_labels } - return customer_labels + return custom_labels diff --git a/python/sglang/srt/entrypoints/openai/serving_chat.py b/python/sglang/srt/entrypoints/openai/serving_chat.py index df40ebead..90572be6c 100644 --- a/python/sglang/srt/entrypoints/openai/serving_chat.py +++ b/python/sglang/srt/entrypoints/openai/serving_chat.py @@ -128,8 +128,8 @@ class OpenAIServingChat(OpenAIServingBase): else: prompt_kwargs = {"input_ids": processed_messages.prompt_ids} - # Extract customer labels from raw request headers - customer_labels = self.extract_customer_labels(raw_request) + # Extract custom labels from raw request headers + custom_labels = self.extract_custom_labels(raw_request) adapted_request = GenerateReqInput( **prompt_kwargs, @@ -151,7 +151,7 @@ class OpenAIServingChat(OpenAIServingBase): rid=request.rid, extra_key=self._compute_extra_key(request), priority=request.priority, - customer_labels=customer_labels, + custom_labels=custom_labels, ) return adapted_request, request diff --git a/python/sglang/srt/entrypoints/openai/serving_completions.py b/python/sglang/srt/entrypoints/openai/serving_completions.py index e394b733b..b065984aa 100644 --- a/python/sglang/srt/entrypoints/openai/serving_completions.py +++ b/python/sglang/srt/entrypoints/openai/serving_completions.py @@ -90,8 +90,8 @@ class OpenAIServingCompletion(OpenAIServingBase): else: prompt_kwargs = {"input_ids": prompt} - # Extract customer labels from raw request headers - customer_labels = self.extract_customer_labels(raw_request) + # Extract custom labels from raw request headers + custom_labels = self.extract_custom_labels(raw_request) adapted_request = GenerateReqInput( **prompt_kwargs, @@ -109,7 +109,7 @@ class OpenAIServingCompletion(OpenAIServingBase): rid=request.rid, extra_key=self._compute_extra_key(request), priority=request.priority, - customer_labels=customer_labels, + custom_labels=custom_labels, ) return adapted_request, request diff --git a/python/sglang/srt/managers/io_struct.py b/python/sglang/srt/managers/io_struct.py index 5d42fde0d..73a7ce0e2 100644 --- a/python/sglang/srt/managers/io_struct.py +++ b/python/sglang/srt/managers/io_struct.py @@ -143,8 +143,8 @@ class GenerateReqInput: # Image gen grpc migration return_bytes: bool = False - # For customer metric labels - customer_labels: Optional[Dict[str, str]] = None + # For custom metric labels + custom_labels: Optional[Dict[str, str]] = None def contains_mm_input(self) -> bool: return ( diff --git a/python/sglang/srt/managers/tokenizer_manager.py b/python/sglang/srt/managers/tokenizer_manager.py index dd341aa3a..cc4b8c038 100644 --- a/python/sglang/srt/managers/tokenizer_manager.py +++ b/python/sglang/srt/managers/tokenizer_manager.py @@ -320,8 +320,8 @@ class TokenizerManager(TokenizerCommunicatorMixin): "model_name": self.server_args.served_model_name, # TODO: Add lora name/path in the future, } - if server_args.tokenizer_metrics_allowed_customer_labels: - for label in server_args.tokenizer_metrics_allowed_customer_labels: + if server_args.tokenizer_metrics_allowed_custom_labels: + for label in server_args.tokenizer_metrics_allowed_custom_labels: labels[label] = "" self.metrics_collector = TokenizerMetricsCollector( server_args=server_args, @@ -1633,10 +1633,10 @@ class TokenizerManager(TokenizerCommunicatorMixin): else 0 ) - customer_labels = getattr(state.obj, "customer_labels", None) + custom_labels = getattr(state.obj, "custom_labels", None) labels = ( - {**self.metrics_collector.labels, **customer_labels} - if customer_labels + {**self.metrics_collector.labels, **custom_labels} + if custom_labels else self.metrics_collector.labels ) if ( diff --git a/python/sglang/srt/metrics/utils.py b/python/sglang/srt/metrics/utils.py index 73c0b4e73..4dc498df7 100644 --- a/python/sglang/srt/metrics/utils.py +++ b/python/sglang/srt/metrics/utils.py @@ -44,7 +44,7 @@ def generate_buckets( return two_sides_exponential_buckets(float(middle), float(base), int(count)) if rule == "default": return sorted(set(default_buckets)) - assert rule == "customer" + assert rule == "custom" return sorted(set([float(x) for x in buckets_rule[1:]])) diff --git a/python/sglang/srt/server_args.py b/python/sglang/srt/server_args.py index d5d1cecf2..020f71f11 100644 --- a/python/sglang/srt/server_args.py +++ b/python/sglang/srt/server_args.py @@ -213,8 +213,8 @@ class ServerArgs: show_time_cost: bool = False enable_metrics: bool = False enable_metrics_for_all_schedulers: bool = False - tokenizer_metrics_custom_labels_header: str = "x-customer-labels" - tokenizer_metrics_allowed_customer_labels: Optional[List[str]] = None + tokenizer_metrics_custom_labels_header: str = "x-custom-labels" + tokenizer_metrics_allowed_custom_labels: Optional[List[str]] = None bucket_time_to_first_token: Optional[List[float]] = None bucket_inter_token_latency: Optional[List[float]] = None bucket_e2e_request_latency: Optional[List[float]] = None @@ -1077,10 +1077,10 @@ class ServerArgs: def _handle_metrics_labels(self): if ( not self.tokenizer_metrics_custom_labels_header - and self.tokenizer_metrics_allowed_customer_labels + and self.tokenizer_metrics_allowed_custom_labels ): raise ValueError( - "Please set --tokenizer-metrics-custom-labels-header when setting --tokenizer-metrics-allowed-customer-labels." + "Please set --tokenizer-metrics-custom-labels-header when setting --tokenizer-metrics-allowed-custom-labels." ) def _handle_deterministic_inference(self): @@ -1535,16 +1535,16 @@ class ServerArgs: "--tokenizer-metrics-custom-labels-header", type=str, default=ServerArgs.tokenizer_metrics_custom_labels_header, - help="Specify the HTTP header for passing customer labels for tokenizer metrics.", + help="Specify the HTTP header for passing custom labels for tokenizer metrics.", ) parser.add_argument( - "--tokenizer-metrics-allowed-customer-labels", + "--tokenizer-metrics-allowed-custom-labels", type=str, nargs="+", - default=ServerArgs.tokenizer_metrics_allowed_customer_labels, - help="The customer labels allowed for tokenizer metrics. The labels are specified via a dict in " + default=ServerArgs.tokenizer_metrics_allowed_custom_labels, + help="The custom labels allowed for tokenizer metrics. The labels are specified via a dict in " "'--tokenizer-metrics-custom-labels-header' field in HTTP requests, e.g., {'label1': 'value1', 'label2': " - "'value2'} is allowed if '--tokenizer-metrics-allowed-labels label1 label2' is set.", + "'value2'} is allowed if '--tokenizer-metrics-allowed-custom-labels label1 label2' is set.", ) parser.add_argument( "--bucket-time-to-first-token", @@ -1576,8 +1576,8 @@ class ServerArgs: bucket_rule = ( "Supports 3 rule types: 'default' uses predefined buckets; 'tse ' " "generates two sides exponential distributed buckets (e.g., 'tse 1000 2 8' generates buckets " - "[984.0, 992.0, 996.0, 998.0, 1000.0, 1002.0, 1004.0, 1008.0, 1016.0]).); 'customer " - " ...' uses custom bucket values (e.g., 'customer 10 50 100 500')." + "[984.0, 992.0, 996.0, 998.0, 1000.0, 1002.0, 1004.0, 1008.0, 1016.0]).); 'custom " + " ...' uses custom bucket values (e.g., 'custom 10 50 100 500')." ) parser.add_argument( "--prompt-tokens-buckets", @@ -2857,8 +2857,8 @@ class ServerArgs: assert rule in [ "tse", "default", - "customer", - ], f"Unsupported {arg_name} rule type: '{rule}'. Must be one of: 'tse', 'default', 'customer'" + "custom", + ], f"Unsupported {arg_name} rule type: '{rule}'. Must be one of: 'tse', 'default', 'custom'" if rule == "tse": assert ( @@ -2881,20 +2881,20 @@ class ServerArgs: len(buckets_rule) == 1 ), f"{arg_name} default rule should only have one parameter: ['default'], got {len(buckets_rule)}" - elif rule == "customer": + elif rule == "custom": assert ( len(buckets_rule) >= 2 - ), f"{arg_name} customer rule requires at least one bucket value: ['customer', value1, ...]" + ), f"{arg_name} custom rule requires at least one bucket value: ['custom', value1, ...]" try: bucket_values = [float(x) for x in buckets_rule[1:]] except ValueError: - assert False, f"{arg_name} customer rule bucket values must be numeric" + assert False, f"{arg_name} custom rule bucket values must be numeric" assert len(set(bucket_values)) == len( bucket_values - ), f"{arg_name} customer rule bucket values should not contain duplicates" + ), f"{arg_name} custom rule bucket values should not contain duplicates" assert all( val >= 0 for val in bucket_values - ), f"{arg_name} customer rule bucket values should be non-negative" + ), f"{arg_name} custom rule bucket values should be non-negative" def adjust_mem_fraction_for_vlm(self, model_config): vision_config = getattr(model_config.hf_config, "vision_config", None) diff --git a/test/srt/test_metrics_utils.py b/test/srt/test_metrics_utils.py index 4d33ad950..1a93a75e0 100644 --- a/test/srt/test_metrics_utils.py +++ b/test/srt/test_metrics_utils.py @@ -81,23 +81,23 @@ class TestMetricsUtils(unittest.TestCase): expected = two_sides_exponential_buckets(10.0, 2.0, 4) self.assertEqual(result, expected) - def test_generate_buckets_customer(self): - """Test generate_buckets with customer rule.""" + def test_generate_buckets_custom(self): + """Test generate_buckets with custom rule.""" default_buckets = [1.0, 5.0, 10.0] - # Test with "customer" rule + # Test with "custom" rule result = generate_buckets( - ["customer", "1.5", "3.2", "7.8", "15.6"], default_buckets + ["custom", "1.5", "3.2", "7.8", "15.6"], default_buckets ) expected = [1.5, 3.2, 7.8, 15.6] self.assertEqual(result, expected) - def test_generate_buckets_customer_with_integers(self): - """Test generate_buckets with customer rule using integer strings.""" + def test_generate_buckets_custom_with_integers(self): + """Test generate_buckets with custom rule using integer strings.""" default_buckets = [1.0, 5.0, 10.0] # Test with integer strings - result = generate_buckets(["customer", "1", "5", "10", "50"], default_buckets) + result = generate_buckets(["custom", "1", "5", "10", "50"], default_buckets) expected = [1.0, 5.0, 10.0, 50.0] self.assertEqual(result, expected) @@ -110,9 +110,9 @@ class TestMetricsUtils(unittest.TestCase): self.assertEqual(result, default_buckets) self.assertIsInstance(result, list) - # Test customer rule with proper float conversion + # Test custom rule with proper float conversion result = generate_buckets( - ["customer", "100", "50", "10", "5", "1"], default_buckets + ["custom", "100", "50", "10", "5", "1"], default_buckets ) expected = [1.0, 5.0, 10.0, 50.0, 100.0] self.assertEqual(result, expected)