Rename customer label -> custom label (#10899)
Co-authored-by: Yingchun Lai <laiyingchun@apache.org> Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
This commit is contained in:
@@ -164,8 +164,8 @@ Please consult the documentation below and [server_args.py](https://github.com/s
|
||||
| `--kv-events-config` | Config in json format for NVIDIA dynamo KV event publishing. Publishing will be enabled if this flag is used. | None |
|
||||
| `--decode-log-interval` | The log interval of decode batch. | 40 |
|
||||
| `--enable-request-time-stats-logging` | Enable per request time stats logging. | False |
|
||||
| `--prompt-tokens-buckets` | The buckets rule of prompt tokens. Supports 3 rule types: 'default' uses predefined buckets; 'tse <middle> <base> <count>' generates two sides exponential distributed buckets (e.g., 'tse 1000 2 8' generates buckets [984.0, 992.0, 996.0, 998.0, 1000.0, 1002.0, 1004.0, 1008.0, 1016.0]).); 'customer <value1> <value2> ...' uses custom bucket values (e.g., 'customer 10 50 100 500'). | None |
|
||||
| `--generation-tokens-buckets` | The buckets rule of prompt tokens. Supports 3 rule types: 'default' uses predefined buckets; 'tse <middle> <base> <count>' generates two sides exponential distributed buckets (e.g., 'tse 1000 2 8' generates buckets [984.0, 992.0, 996.0, 998.0, 1000.0, 1002.0, 1004.0, 1008.0, 1016.0]).); 'customer <value1> <value2> ...' uses custom bucket values (e.g., 'customer 10 50 100 500'). | None |
|
||||
| `--prompt-tokens-buckets` | The buckets rule of prompt tokens. Supports 3 rule types: 'default' uses predefined buckets; 'tse <middle> <base> <count>' generates two sides exponential distributed buckets (e.g., 'tse 1000 2 8' generates buckets [984.0, 992.0, 996.0, 998.0, 1000.0, 1002.0, 1004.0, 1008.0, 1016.0]).); 'custom <value1> <value2> ...' uses custom bucket values (e.g., 'custom 10 50 100 500'). | None |
|
||||
| `--generation-tokens-buckets` | The buckets rule of generation tokens. Supports 3 rule types: 'default' uses predefined buckets; 'tse <middle> <base> <count>' generates two sides exponential distributed buckets (e.g., 'tse 1000 2 8' generates buckets [984.0, 992.0, 996.0, 998.0, 1000.0, 1002.0, 1004.0, 1008.0, 1016.0]).); 'custom <value1> <value2> ...' uses custom bucket values (e.g., 'custom 10 50 100 500'). | None |
|
||||
|
||||
## API related
|
||||
|
||||
|
||||
@@ -235,8 +235,8 @@ class CompletionRequest(BaseModel):
|
||||
# Priority for the request
|
||||
priority: Optional[int] = None
|
||||
|
||||
# For customer metric labels
|
||||
customer_labels: Optional[Dict[str, str]] = None
|
||||
# For custom metric labels
|
||||
custom_labels: Optional[Dict[str, str]] = None
|
||||
|
||||
@field_validator("max_tokens")
|
||||
@classmethod
|
||||
|
||||
@@ -27,10 +27,10 @@ class OpenAIServingBase(ABC):
|
||||
self.tokenizer_manager = tokenizer_manager
|
||||
self.allowed_custom_labels = (
|
||||
set(
|
||||
self.tokenizer_manager.server_args.tokenizer_metrics_allowed_customer_labels
|
||||
self.tokenizer_manager.server_args.tokenizer_metrics_allowed_custom_labels
|
||||
)
|
||||
if isinstance(self.tokenizer_manager.server_args, ServerArgs)
|
||||
and self.tokenizer_manager.server_args.tokenizer_metrics_allowed_customer_labels
|
||||
and self.tokenizer_manager.server_args.tokenizer_metrics_allowed_custom_labels
|
||||
else None
|
||||
)
|
||||
|
||||
@@ -178,14 +178,14 @@ class OpenAIServingBase(ABC):
|
||||
)
|
||||
return json.dumps({"error": error.model_dump()})
|
||||
|
||||
def extract_customer_labels(self, raw_request):
|
||||
def extract_custom_labels(self, raw_request):
|
||||
if (
|
||||
not self.allowed_custom_labels
|
||||
or not self.tokenizer_manager.server_args.tokenizer_metrics_custom_labels_header
|
||||
):
|
||||
return None
|
||||
|
||||
customer_labels = None
|
||||
custom_labels = None
|
||||
header = (
|
||||
self.tokenizer_manager.server_args.tokenizer_metrics_custom_labels_header
|
||||
)
|
||||
@@ -200,9 +200,9 @@ class OpenAIServingBase(ABC):
|
||||
raw_labels = None
|
||||
|
||||
if isinstance(raw_labels, dict):
|
||||
customer_labels = {
|
||||
custom_labels = {
|
||||
label: value
|
||||
for label, value in raw_labels.items()
|
||||
if label in self.allowed_custom_labels
|
||||
}
|
||||
return customer_labels
|
||||
return custom_labels
|
||||
|
||||
@@ -128,8 +128,8 @@ class OpenAIServingChat(OpenAIServingBase):
|
||||
else:
|
||||
prompt_kwargs = {"input_ids": processed_messages.prompt_ids}
|
||||
|
||||
# Extract customer labels from raw request headers
|
||||
customer_labels = self.extract_customer_labels(raw_request)
|
||||
# Extract custom labels from raw request headers
|
||||
custom_labels = self.extract_custom_labels(raw_request)
|
||||
|
||||
adapted_request = GenerateReqInput(
|
||||
**prompt_kwargs,
|
||||
@@ -151,7 +151,7 @@ class OpenAIServingChat(OpenAIServingBase):
|
||||
rid=request.rid,
|
||||
extra_key=self._compute_extra_key(request),
|
||||
priority=request.priority,
|
||||
customer_labels=customer_labels,
|
||||
custom_labels=custom_labels,
|
||||
)
|
||||
|
||||
return adapted_request, request
|
||||
|
||||
@@ -90,8 +90,8 @@ class OpenAIServingCompletion(OpenAIServingBase):
|
||||
else:
|
||||
prompt_kwargs = {"input_ids": prompt}
|
||||
|
||||
# Extract customer labels from raw request headers
|
||||
customer_labels = self.extract_customer_labels(raw_request)
|
||||
# Extract custom labels from raw request headers
|
||||
custom_labels = self.extract_custom_labels(raw_request)
|
||||
|
||||
adapted_request = GenerateReqInput(
|
||||
**prompt_kwargs,
|
||||
@@ -109,7 +109,7 @@ class OpenAIServingCompletion(OpenAIServingBase):
|
||||
rid=request.rid,
|
||||
extra_key=self._compute_extra_key(request),
|
||||
priority=request.priority,
|
||||
customer_labels=customer_labels,
|
||||
custom_labels=custom_labels,
|
||||
)
|
||||
|
||||
return adapted_request, request
|
||||
|
||||
@@ -143,8 +143,8 @@ class GenerateReqInput:
|
||||
# Image gen grpc migration
|
||||
return_bytes: bool = False
|
||||
|
||||
# For customer metric labels
|
||||
customer_labels: Optional[Dict[str, str]] = None
|
||||
# For custom metric labels
|
||||
custom_labels: Optional[Dict[str, str]] = None
|
||||
|
||||
def contains_mm_input(self) -> bool:
|
||||
return (
|
||||
|
||||
@@ -320,8 +320,8 @@ class TokenizerManager(TokenizerCommunicatorMixin):
|
||||
"model_name": self.server_args.served_model_name,
|
||||
# TODO: Add lora name/path in the future,
|
||||
}
|
||||
if server_args.tokenizer_metrics_allowed_customer_labels:
|
||||
for label in server_args.tokenizer_metrics_allowed_customer_labels:
|
||||
if server_args.tokenizer_metrics_allowed_custom_labels:
|
||||
for label in server_args.tokenizer_metrics_allowed_custom_labels:
|
||||
labels[label] = ""
|
||||
self.metrics_collector = TokenizerMetricsCollector(
|
||||
server_args=server_args,
|
||||
@@ -1633,10 +1633,10 @@ class TokenizerManager(TokenizerCommunicatorMixin):
|
||||
else 0
|
||||
)
|
||||
|
||||
customer_labels = getattr(state.obj, "customer_labels", None)
|
||||
custom_labels = getattr(state.obj, "custom_labels", None)
|
||||
labels = (
|
||||
{**self.metrics_collector.labels, **customer_labels}
|
||||
if customer_labels
|
||||
{**self.metrics_collector.labels, **custom_labels}
|
||||
if custom_labels
|
||||
else self.metrics_collector.labels
|
||||
)
|
||||
if (
|
||||
|
||||
@@ -44,7 +44,7 @@ def generate_buckets(
|
||||
return two_sides_exponential_buckets(float(middle), float(base), int(count))
|
||||
if rule == "default":
|
||||
return sorted(set(default_buckets))
|
||||
assert rule == "customer"
|
||||
assert rule == "custom"
|
||||
return sorted(set([float(x) for x in buckets_rule[1:]]))
|
||||
|
||||
|
||||
|
||||
@@ -213,8 +213,8 @@ class ServerArgs:
|
||||
show_time_cost: bool = False
|
||||
enable_metrics: bool = False
|
||||
enable_metrics_for_all_schedulers: bool = False
|
||||
tokenizer_metrics_custom_labels_header: str = "x-customer-labels"
|
||||
tokenizer_metrics_allowed_customer_labels: Optional[List[str]] = None
|
||||
tokenizer_metrics_custom_labels_header: str = "x-custom-labels"
|
||||
tokenizer_metrics_allowed_custom_labels: Optional[List[str]] = None
|
||||
bucket_time_to_first_token: Optional[List[float]] = None
|
||||
bucket_inter_token_latency: Optional[List[float]] = None
|
||||
bucket_e2e_request_latency: Optional[List[float]] = None
|
||||
@@ -1077,10 +1077,10 @@ class ServerArgs:
|
||||
def _handle_metrics_labels(self):
|
||||
if (
|
||||
not self.tokenizer_metrics_custom_labels_header
|
||||
and self.tokenizer_metrics_allowed_customer_labels
|
||||
and self.tokenizer_metrics_allowed_custom_labels
|
||||
):
|
||||
raise ValueError(
|
||||
"Please set --tokenizer-metrics-custom-labels-header when setting --tokenizer-metrics-allowed-customer-labels."
|
||||
"Please set --tokenizer-metrics-custom-labels-header when setting --tokenizer-metrics-allowed-custom-labels."
|
||||
)
|
||||
|
||||
def _handle_deterministic_inference(self):
|
||||
@@ -1535,16 +1535,16 @@ class ServerArgs:
|
||||
"--tokenizer-metrics-custom-labels-header",
|
||||
type=str,
|
||||
default=ServerArgs.tokenizer_metrics_custom_labels_header,
|
||||
help="Specify the HTTP header for passing customer labels for tokenizer metrics.",
|
||||
help="Specify the HTTP header for passing custom labels for tokenizer metrics.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--tokenizer-metrics-allowed-customer-labels",
|
||||
"--tokenizer-metrics-allowed-custom-labels",
|
||||
type=str,
|
||||
nargs="+",
|
||||
default=ServerArgs.tokenizer_metrics_allowed_customer_labels,
|
||||
help="The customer labels allowed for tokenizer metrics. The labels are specified via a dict in "
|
||||
default=ServerArgs.tokenizer_metrics_allowed_custom_labels,
|
||||
help="The custom labels allowed for tokenizer metrics. The labels are specified via a dict in "
|
||||
"'--tokenizer-metrics-custom-labels-header' field in HTTP requests, e.g., {'label1': 'value1', 'label2': "
|
||||
"'value2'} is allowed if '--tokenizer-metrics-allowed-labels label1 label2' is set.",
|
||||
"'value2'} is allowed if '--tokenizer-metrics-allowed-custom-labels label1 label2' is set.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--bucket-time-to-first-token",
|
||||
@@ -1576,8 +1576,8 @@ class ServerArgs:
|
||||
bucket_rule = (
|
||||
"Supports 3 rule types: 'default' uses predefined buckets; 'tse <middle> <base> <count>' "
|
||||
"generates two sides exponential distributed buckets (e.g., 'tse 1000 2 8' generates buckets "
|
||||
"[984.0, 992.0, 996.0, 998.0, 1000.0, 1002.0, 1004.0, 1008.0, 1016.0]).); 'customer <value1> "
|
||||
"<value2> ...' uses custom bucket values (e.g., 'customer 10 50 100 500')."
|
||||
"[984.0, 992.0, 996.0, 998.0, 1000.0, 1002.0, 1004.0, 1008.0, 1016.0]).); 'custom <value1> "
|
||||
"<value2> ...' uses custom bucket values (e.g., 'custom 10 50 100 500')."
|
||||
)
|
||||
parser.add_argument(
|
||||
"--prompt-tokens-buckets",
|
||||
@@ -2857,8 +2857,8 @@ class ServerArgs:
|
||||
assert rule in [
|
||||
"tse",
|
||||
"default",
|
||||
"customer",
|
||||
], f"Unsupported {arg_name} rule type: '{rule}'. Must be one of: 'tse', 'default', 'customer'"
|
||||
"custom",
|
||||
], f"Unsupported {arg_name} rule type: '{rule}'. Must be one of: 'tse', 'default', 'custom'"
|
||||
|
||||
if rule == "tse":
|
||||
assert (
|
||||
@@ -2881,20 +2881,20 @@ class ServerArgs:
|
||||
len(buckets_rule) == 1
|
||||
), f"{arg_name} default rule should only have one parameter: ['default'], got {len(buckets_rule)}"
|
||||
|
||||
elif rule == "customer":
|
||||
elif rule == "custom":
|
||||
assert (
|
||||
len(buckets_rule) >= 2
|
||||
), f"{arg_name} customer rule requires at least one bucket value: ['customer', value1, ...]"
|
||||
), f"{arg_name} custom rule requires at least one bucket value: ['custom', value1, ...]"
|
||||
try:
|
||||
bucket_values = [float(x) for x in buckets_rule[1:]]
|
||||
except ValueError:
|
||||
assert False, f"{arg_name} customer rule bucket values must be numeric"
|
||||
assert False, f"{arg_name} custom rule bucket values must be numeric"
|
||||
assert len(set(bucket_values)) == len(
|
||||
bucket_values
|
||||
), f"{arg_name} customer rule bucket values should not contain duplicates"
|
||||
), f"{arg_name} custom rule bucket values should not contain duplicates"
|
||||
assert all(
|
||||
val >= 0 for val in bucket_values
|
||||
), f"{arg_name} customer rule bucket values should be non-negative"
|
||||
), f"{arg_name} custom rule bucket values should be non-negative"
|
||||
|
||||
def adjust_mem_fraction_for_vlm(self, model_config):
|
||||
vision_config = getattr(model_config.hf_config, "vision_config", None)
|
||||
|
||||
@@ -81,23 +81,23 @@ class TestMetricsUtils(unittest.TestCase):
|
||||
expected = two_sides_exponential_buckets(10.0, 2.0, 4)
|
||||
self.assertEqual(result, expected)
|
||||
|
||||
def test_generate_buckets_customer(self):
|
||||
"""Test generate_buckets with customer rule."""
|
||||
def test_generate_buckets_custom(self):
|
||||
"""Test generate_buckets with custom rule."""
|
||||
default_buckets = [1.0, 5.0, 10.0]
|
||||
|
||||
# Test with "customer" rule
|
||||
# Test with "custom" rule
|
||||
result = generate_buckets(
|
||||
["customer", "1.5", "3.2", "7.8", "15.6"], default_buckets
|
||||
["custom", "1.5", "3.2", "7.8", "15.6"], default_buckets
|
||||
)
|
||||
expected = [1.5, 3.2, 7.8, 15.6]
|
||||
self.assertEqual(result, expected)
|
||||
|
||||
def test_generate_buckets_customer_with_integers(self):
|
||||
"""Test generate_buckets with customer rule using integer strings."""
|
||||
def test_generate_buckets_custom_with_integers(self):
|
||||
"""Test generate_buckets with custom rule using integer strings."""
|
||||
default_buckets = [1.0, 5.0, 10.0]
|
||||
|
||||
# Test with integer strings
|
||||
result = generate_buckets(["customer", "1", "5", "10", "50"], default_buckets)
|
||||
result = generate_buckets(["custom", "1", "5", "10", "50"], default_buckets)
|
||||
expected = [1.0, 5.0, 10.0, 50.0]
|
||||
self.assertEqual(result, expected)
|
||||
|
||||
@@ -110,9 +110,9 @@ class TestMetricsUtils(unittest.TestCase):
|
||||
self.assertEqual(result, default_buckets)
|
||||
self.assertIsInstance(result, list)
|
||||
|
||||
# Test customer rule with proper float conversion
|
||||
# Test custom rule with proper float conversion
|
||||
result = generate_buckets(
|
||||
["customer", "100", "50", "10", "5", "1"], default_buckets
|
||||
["custom", "100", "50", "10", "5", "1"], default_buckets
|
||||
)
|
||||
expected = [1.0, 5.0, 10.0, 50.0, 100.0]
|
||||
self.assertEqual(result, expected)
|
||||
|
||||
Reference in New Issue
Block a user