diff --git a/examples/disaggregated_prefill_v1/load_balance_proxy_layerwise_server_example.py b/examples/disaggregated_prefill_v1/load_balance_proxy_layerwise_server_example.py index c600155..7e80b55 100644 --- a/examples/disaggregated_prefill_v1/load_balance_proxy_layerwise_server_example.py +++ b/examples/disaggregated_prefill_v1/load_balance_proxy_layerwise_server_example.py @@ -556,7 +556,13 @@ async def _handle_completions(api: str, request: Request): instance_info.prefiller_idx, instance_info.prefiller_score) released_kv = True - chunk_str = chunk.decode("utf-8").strip() + try: + chunk_str = chunk.decode("utf-8").strip() + except UnicodeDecodeError: + logger.debug( + f"Skipping chunk: {chunk}") + yield chunk + continue if not chunk_str: continue if chunk_str.startswith("data: "): diff --git a/examples/disaggregated_prefill_v1/load_balance_proxy_server_example.py b/examples/disaggregated_prefill_v1/load_balance_proxy_server_example.py index 880ed69..0694ace 100644 --- a/examples/disaggregated_prefill_v1/load_balance_proxy_server_example.py +++ b/examples/disaggregated_prefill_v1/load_balance_proxy_server_example.py @@ -539,7 +539,13 @@ async def _handle_completions(api: str, request: Request): instance_info.prefiller_idx, instance_info.prefiller_score) released_kv = True - chunk_str = chunk.decode("utf-8").strip() + try: + chunk_str = chunk.decode("utf-8").strip() + except UnicodeDecodeError: + logger.debug( + f"Skipping chunk: {chunk}") + yield chunk + continue if not chunk_str: continue if chunk_str.startswith("data: "):