131 lines
1003 KiB
Plaintext
131 lines
1003 KiB
Plaintext
|
|
[2026-04-09 16:37:05 nanoeval] run.py:42 - Step 0/1 system_prompt: None
|
||
|
|
[2026-04-09 16:37:05 nanoeval] task.py:107 - prepare_eval_input: system_prompt=None
|
||
|
|
[2026-04-09 16:37:08 nanoeval] run.py:52 - Step 0/1 completed: {'task_count': 6, 'instance_count': 5920, 'pass_k_by_task': {'gpqa_diamond': 4, 'hmmt2025': 4, 'aime2024': 32, 'aime2025': 32, 'math500': 4, 'minerva': 4}, 'task_sizes': {'gpqa_diamond': 792, 'hmmt2025': 120, 'aime2024': 960, 'aime2025': 960, 'math500': 2000, 'minerva': 1088}, 'output_path': '/jpfs-5p/chenyanxu.9/model/Qwen3-8B-onpolicy-profiling-20260403_091551/iter_0001023-hf/eval_20260409163705/step01_prepared.jsonl'}
|
||
|
|
[2026-04-09 16:37:08 nanoeval] base.py:107 - Initializing Engine (TP=1)...
|
||
|
|
<frozen importlib._bootstrap_external>:1297: FutureWarning: The cuda.cudart module is deprecated and will be removed in a future release, please switch to use the cuda.bindings.runtime module instead.
|
||
|
|
<frozen importlib._bootstrap_external>:1297: FutureWarning: The cuda.nvrtc module is deprecated and will be removed in a future release, please switch to use the cuda.bindings.nvrtc module instead.
|
||
|
|
[2026-04-09 16:37:14 nanoeval] server_args.py:1837 - Attention backend not specified. Use fa3 backend by default.
|
||
|
|
[2026-04-09 16:37:14 nanoeval] engine.py:157 - server_args=ServerArgs(model_path='/jpfs-5p/chenyanxu.9/model/Qwen3-8B-onpolicy-profiling-20260403_091551/iter_0001023-hf', tokenizer_path='/jpfs-5p/chenyanxu.9/model/Qwen3-8B-onpolicy-profiling-20260403_091551/iter_0001023-hf', tokenizer_mode='auto', tokenizer_worker_num=1, skip_tokenizer_init=False, load_format='auto', model_loader_extra_config='{}', trust_remote_code=True, context_length=None, is_embedding=False, enable_multimodal=None, revision=None, model_impl='auto', host='127.0.0.1', port=30000, fastapi_root_path='', grpc_mode=False, skip_server_warmup=False, warmups=None, nccl_port=None, checkpoint_engine_wait_weights_before_ready=False, dtype='auto', quantization=None, quantization_param_path=None, kv_cache_dtype='auto', enable_fp32_lm_head=False, modelopt_quant=None, modelopt_checkpoint_restore_path=None, modelopt_checkpoint_save_path=None, modelopt_export_path=None, quantize_and_serve=False, rl_quant_profile=None, mem_fraction_static=0.9, max_running_requests=None, max_queued_requests=None, max_total_tokens=None, chunked_prefill_size=8192, enable_dynamic_chunking=False, max_prefill_tokens=16384, prefill_max_requests=None, schedule_policy='fcfs', enable_priority_scheduling=False, abort_on_priority_when_disabled=False, schedule_low_priority_values_first=False, priority_scheduling_preemption_threshold=10, schedule_conservativeness=1.0, page_size=1, swa_full_tokens_ratio=0.8, disable_hybrid_swa_memory=False, radix_eviction_policy='lru', enable_prefill_delayer=False, prefill_delayer_max_delay_passes=30, prefill_delayer_token_usage_low_watermark=None, prefill_delayer_forward_passes_buckets=None, prefill_delayer_wait_seconds_buckets=None, device='cuda', tp_size=1, pp_size=1, pp_max_micro_batch_size=None, pp_async_batch_depth=0, stream_interval=1, stream_output=False, random_seed=199674123, constrained_json_whitespace_pattern=None, constrained_json_disable_any_whitespace=False, watchdog_timeout=300, soft_watchdog_timeout=None, dist_timeout=None, download_dir=None, model_checksum=None, base_gpu_id=0, gpu_id_step=1, sleep_on_idle=False, custom_sigquit_handler=None, log_level='error', log_level_http=None, log_requests=False, log_requests_level=2, log_requests_format='text', log_requests_target=None, uvicorn_access_log_exclude_prefixes=[], crash_dump_folder=None, show_time_cost=False, enable_metrics=False, enable_metrics_for_all_schedulers=False, tokenizer_metrics_custom_labels_header='x-custom-labels', tokenizer_metrics_allowed_custom_labels=None, extra_metric_labels=None, bucket_time_to_first_token=None, bucket_inter_token_latency=None, bucket_e2e_request_latency=None, collect_tokens_histogram=False, prompt_tokens_buckets=None, generation_tokens_buckets=None, gc_warning_threshold_secs=0.0, decode_log_interval=40, enable_request_time_stats_logging=False, kv_events_config=None, enable_trace=False, otlp_traces_endpoint='localhost:4317', export_metrics_to_file=False, export_metrics_to_file_dir=None, api_key=None, admin_api_key=None, served_model_name='/jpfs-5p/chenyanxu.9/model/Qwen3-8B-onpolicy-profiling-20260403_091551/iter_0001023-hf', weight_version='default', chat_template=None, hf_chat_template_name=None, completion_template=None, file_storage_path='sglang_storage', enable_cache_report=False, reasoning_parser=None, tool_call_parser=None, tool_server=None, sampling_defaults='model', dp_size=8, load_balance_method='round_robin', attn_cp_size=1, moe_dp_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', preferred_sampling_params=None, enable_lora=None, enable_lora_overlap_loading=None, max_lora_rank=None, lora_target_modules=None, lora_paths=None, max_loaded_loras=None, max_loras_per_batch=8, lora_eviction_policy='lru', lora_backend='csgmv', max_lora_chunk_size=16, attention_backend='fa3', decode_attention_backend=None, prefill_attention_backend=None, sampling_backend='flashinfer', grammar_backend='xgrammar', mm_attention_backend=None, fp8_gemm_runner_backend='auto', fp4_gemm_runner_backend='flashinfer_cutlass', nsa_prefill_backend=None, nsa_d
|
||
|
|
<frozen importlib._bootstrap_external>:1297: FutureWarning: The cuda.cudart module is deprecated and will be removed in a future release, please switch to use the cuda.bindings.runtime module instead.
|
||
|
|
<frozen importlib._bootstrap_external>:1297: FutureWarning: The cuda.nvrtc module is deprecated and will be removed in a future release, please switch to use the cuda.bindings.nvrtc module instead.
|
||
|
|
<frozen importlib._bootstrap_external>:1297: FutureWarning: The cuda.cudart module is deprecated and will be removed in a future release, please switch to use the cuda.bindings.runtime module instead.
|
||
|
|
<frozen importlib._bootstrap_external>:1297: FutureWarning: The cuda.nvrtc module is deprecated and will be removed in a future release, please switch to use the cuda.bindings.nvrtc module instead.
|
||
|
|
<frozen importlib._bootstrap_external>:1297: FutureWarning: The cuda.cudart module is deprecated and will be removed in a future release, please switch to use the cuda.bindings.runtime module instead.
|
||
|
|
<frozen importlib._bootstrap_external>:1297: FutureWarning: The cuda.nvrtc module is deprecated and will be removed in a future release, please switch to use the cuda.bindings.nvrtc module instead.
|
||
|
|
<frozen importlib._bootstrap_external>:1297: FutureWarning: The cuda.cudart module is deprecated and will be removed in a future release, please switch to use the cuda.bindings.runtime module instead.
|
||
|
|
<frozen importlib._bootstrap_external>:1297: FutureWarning: The cuda.nvrtc module is deprecated and will be removed in a future release, please switch to use the cuda.bindings.nvrtc module instead.
|
||
|
|
<frozen importlib._bootstrap_external>:1297: FutureWarning: The cuda.cudart module is deprecated and will be removed in a future release, please switch to use the cuda.bindings.runtime module instead.
|
||
|
|
<frozen importlib._bootstrap_external>:1297: FutureWarning: The cuda.nvrtc module is deprecated and will be removed in a future release, please switch to use the cuda.bindings.nvrtc module instead.
|
||
|
|
<frozen importlib._bootstrap_external>:1297: FutureWarning: The cuda.cudart module is deprecated and will be removed in a future release, please switch to use the cuda.bindings.runtime module instead.
|
||
|
|
<frozen importlib._bootstrap_external>:1297: FutureWarning: The cuda.nvrtc module is deprecated and will be removed in a future release, please switch to use the cuda.bindings.nvrtc module instead.
|
||
|
|
<frozen importlib._bootstrap_external>:1297: FutureWarning: The cuda.cudart module is deprecated and will be removed in a future release, please switch to use the cuda.bindings.runtime module instead.
|
||
|
|
<frozen importlib._bootstrap_external>:1297: FutureWarning: The cuda.nvrtc module is deprecated and will be removed in a future release, please switch to use the cuda.bindings.nvrtc module instead.
|
||
|
|
<frozen importlib._bootstrap_external>:1297: FutureWarning: The cuda.cudart module is deprecated and will be removed in a future release, please switch to use the cuda.bindings.runtime module instead.
|
||
|
|
<frozen importlib._bootstrap_external>:1297: FutureWarning: The cuda.nvrtc module is deprecated and will be removed in a future release, please switch to use the cuda.bindings.nvrtc module instead.
|
||
|
|
<frozen importlib._bootstrap_external>:1297: FutureWarning: The cuda.cudart module is deprecated and will be removed in a future release, please switch to use the cuda.bindings.runtime module instead.
|
||
|
|
<frozen importlib._bootstrap_external>:1297: FutureWarning: The cuda.nvrtc module is deprecated and will be removed in a future release, please switch to use the cuda.bindings.nvrtc module instead.
|
||
|
|
<frozen importlib._bootstrap_external>:1297: FutureWarning: The cuda.cudart module is deprecated and will be removed in a future release, please switch to use the cuda.bindings.runtime module instead.
|
||
|
|
<frozen importlib._bootstrap_external>:1297: FutureWarning: The cuda.nvrtc module is deprecated and will be removed in a future release, please switch to use the cuda.bindings.nvrtc module instead.
|
||
|
|
[Gloo] Rank 0 is connected to 0 peer ranks. Expected number of connected peer ranks is : 0
|
||
|
|
[Gloo] Rank 0 is connected to 0 peer ranks. Expected number of connected peer ranks is : 0
|
||
|
|
[Gloo] Rank 0 is connected to 0 peer ranks. Expected number of connected peer ranks is : 0
|
||
|
|
[Gloo] Rank 0 is connected to 0 peer ranks. Expected number of connected peer ranks is : 0
|
||
|
|
[Gloo] Rank 0 is connected to 0 peer ranks. Expected number of connected peer ranks is : 0
|
||
|
|
[Gloo] Rank 0 is connected to 0 peer ranks. Expected number of connected peer ranks is : 0
|
||
|
|
[Gloo] Rank 0 is connected to 0 peer ranks. Expected number of connected peer ranks is : 0
|
||
|
|
[Gloo] Rank 0 is connected to 0 peer ranks. Expected number of connected peer ranks is : 0
|
||
|
|
[Gloo] Rank 0 is connected to 0 peer ranks. Expected number of connected peer ranks is : 0
|
||
|
|
[Gloo] Rank 0 is connected to 0 peer ranks. Expected number of connected peer ranks is : 0
|
||
|
|
[Gloo] Rank 0 is connected to 0 peer ranks. Expected number of connected peer ranks is : 0
|
||
|
|
[Gloo] Rank 0 is connected to 0 peer ranks. Expected number of connected peer ranks is : 0
|
||
|
|
[Gloo] Rank 0 is connected to 0 peer ranks. Expected number of connected peer ranks is : 0
|
||
|
|
[Gloo] Rank 0 is connected to 0 peer ranks. Expected number of connected peer ranks is : 0
|
||
|
|
[Gloo] Rank 0 is connected to 0 peer ranks. Expected number of connected peer ranks is : 0
|
||
|
|
[Gloo] Rank 0 is connected to 0 peer ranks. Expected number of connected peer ranks is : 0
|
||
|
|
[Gloo] Rank 0 is connected to 0 peer ranks. Expected number of connected peer ranks is : 0
|
||
|
|
[Gloo] Rank 0 is connected to 0 peer ranks. Expected number of connected peer ranks is : 0
|
||
|
|
[Gloo] Rank 0 is connected to 0 peer ranks. Expected number of connected peer ranks is : 0
|
||
|
|
[Gloo] Rank 0 is connected to 0 peer ranks. Expected number of connected peer ranks is : 0
|
||
|
|
[Gloo] Rank 0 is connected to 0 peer ranks. Expected number of connected peer ranks is : 0
|
||
|
|
[Gloo] Rank 0 is connected to 0 peer ranks. Expected number of connected peer ranks is : 0
|
||
|
|
[Gloo] Rank 0 is connected to 0 peer ranks. Expected number of connected peer ranks is : 0
|
||
|
|
[Gloo] Rank 0 is connected to 0 peer ranks. Expected number of connected peer ranks is : 0
|
||
|
|
Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
|
||
|
|
Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
|
||
|
|
Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
|
||
|
|
Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
|
||
|
|
Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
|
||
|
|
Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
|
||
|
|
Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
|
||
|
|
Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
|
||
|
|
Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:04<00:13, 4.60s/it]
|
||
|
|
Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:04<00:12, 4.33s/it]
|
||
|
|
Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:04<00:13, 4.45s/it]
|
||
|
|
Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:04<00:13, 4.58s/it]
|
||
|
|
Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:04<00:13, 4.59s/it]
|
||
|
|
Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:04<00:13, 4.63s/it]
|
||
|
|
Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:04<00:13, 4.59s/it]
|
||
|
|
Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:04<00:13, 4.63s/it]
|
||
|
|
Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:08<00:08, 4.44s/it]
|
||
|
|
Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:08<00:08, 4.44s/it]
|
||
|
|
Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:08<00:08, 4.46s/it]
|
||
|
|
Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:08<00:08, 4.45s/it]
|
||
|
|
Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:08<00:08, 4.38s/it]
|
||
|
|
Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:08<00:08, 4.44s/it]
|
||
|
|
Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:08<00:08, 4.33s/it]
|
||
|
|
Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:08<00:08, 4.46s/it]
|
||
|
|
Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:12<00:04, 4.06s/it]
|
||
|
|
Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:12<00:04, 4.09s/it]
|
||
|
|
Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:12<00:04, 4.10s/it]
|
||
|
|
Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:12<00:04, 4.09s/it]
|
||
|
|
Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:12<00:04, 4.03s/it]
|
||
|
|
Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:12<00:04, 4.09s/it]
|
||
|
|
Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:12<00:04, 4.09s/it]
|
||
|
|
Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:12<00:04, 4.10s/it]
|
||
|
|
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:13<00:00, 2.77s/it]
|
||
|
|
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:13<00:00, 2.77s/it]
|
||
|
|
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:13<00:00, 2.75s/it]
|
||
|
|
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:13<00:00, 2.77s/it]
|
||
|
|
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:13<00:00, 2.77s/it]
|
||
|
|
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:13<00:00, 2.77s/it]
|
||
|
|
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:13<00:00, 2.77s/it]
|
||
|
|
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:13<00:00, 2.73s/it]
|
||
|
|
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:13<00:00, 3.34s/it]
|
||
|
|
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:13<00:00, 3.30s/it]
|
||
|
|
|
||
|
|
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:13<00:00, 3.34s/it]
|
||
|
|
|
||
|
|
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:13<00:00, 3.34s/it]
|
||
|
|
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:13<00:00, 3.33s/it]
|
||
|
|
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:13<00:00, 3.33s/it]
|
||
|
|
|
||
|
|
|
||
|
|
|
||
|
|
|
||
|
|
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:13<00:00, 3.27s/it]
|
||
|
|
|
||
|
|
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:13<00:00, 3.35s/it]
|
||
|
|
|
||
|
|
0%| | 0/36 [00:00<?, ?it/s]
Capturing batches (bs=256 avail_mem=13.09 GB): 0%| | 0/36 [00:00<?, ?it/s]
Capturing batches (bs=256 avail_mem=13.09 GB): 3%|▎ | 1/36 [00:00<00:07, 4.88it/s]
Capturing batches (bs=248 avail_mem=12.84 GB): 3%|▎ | 1/36 [00:00<00:07, 4.88it/s]
Capturing batches (bs=240 avail_mem=12.83 GB): 3%|▎ | 1/36 [00:00<00:07, 4.88it/s]
0%| | 0/36 [00:00<?, ?it/s]
Capturing batches (bs=256 avail_mem=13.09 GB): 0%| | 0/36 [00:00<?, ?it/s]
0%| | 0/36 [00:00<?, ?it/s]
Capturing batches (bs=256 avail_mem=13.09 GB): 0%| | 0/36 [00:00<?, ?it/s]
0%| | 0/36 [00:00<?, ?it/s]
Capturing batches (bs=256 avail_mem=13.09 GB): 0%| | 0/36 [00:00<?, ?it/s]
0%| | 0/36 [00:00<?, ?it/s]
Capturing batches (bs=256 avail_mem=13.09 GB): 0%| | 0/36 [00:00<?, ?it/s]
0%| | 0/36 [00:00<?, ?it/s]
Capturing batches (bs=256 avail_mem=13.09 GB): 0%| | 0/36 [00:00<?, ?it/s]
Capturing batches (bs=232 avail_mem=12.83 GB): 3%|▎ | 1/36 [00:00<00:07, 4.88it/s]
Capturing batches (bs=232 avail_mem=12.83 GB): 11%|█ | 4/36 [00:00<00:02, 13.17it/s]
Capturing batches (bs=224 avail_mem=12.82 GB): 11%|█ | 4/36 [00:00<00:02, 13.17it/s]
Capturing batches (bs=216 avail_mem=12.81 GB): 11%|█ | 4/36 [00:00<00:02, 13.17it/s]
0%| | 0/36 [00:00<?, ?it/s]
Capturing batches (bs=256 avail_mem=13.09 GB): 0%| | 0/36 [00:00<?, ?it/s]
0%| | 0/36 [00:00<?, ?it/s]
Capturing batches (bs=256 avail_mem=13.09 GB): 0%| | 0/36 [00:00<?, ?it/s]
Capturing batches (bs=208 avail_mem=12.81 GB): 11%|█ | 4/36 [00:00<00:02, 13.17it/s]
Capturing batches (bs=256 avail_mem=13.09 GB): 3%|▎ | 1/36 [00:00<00:06, 5.10it/s]
Capturing batches (bs=248 avail_mem=12.84 GB): 3%|▎ | 1/36 [00:00<00:06, 5.10it/s]
Capturing batches (bs=256 avail_mem=13.09 GB): 3%|▎ | 1/36 [00:00<00:06, 5.09it/s]
Capturing batches (bs=248 avail_mem=12.84 GB): 3%|▎ | 1/36 [00:00<00:06, 5.09it/s]
Capturing batches (bs=256 avail_mem=13.09 GB): 3%|▎ | 1/36 [00:00<00:06, 5.21it/s]
Capturing batches (bs=248 avail_mem=12.84 GB): 3%|▎ | 1/36 [00:00<00:06, 5.21it/s]
Capturing batches (bs=208 avail_mem=12.81 GB): 19%|█▉ | 7/36 [00:00<00:01, 16.35it/s]
Capturing batches (bs=200 avail_mem=12.80 GB): 19%|█▉ | 7/36 [00:00<00:01, 16.35it/s]
Capturing batches (bs=240 avail_mem=12.83 GB): 3%|▎ | 1/36 [00:00<00:06, 5.10it/s]
Capturing batches (bs=240 avail_mem=12.83 GB): 3%|▎ | 1/36 [00:00<00:06, 5.09it/s]
Capturing batches (bs=240 avail_mem=12.83 GB): 3%|▎ | 1/36 [00:00<00:06, 5.21it/s]
Capturing batches (bs=192 avail_mem=12.79 GB): 19%|█▉ | 7/36 [00:00<00:01, 16.35it/s]
Capturing batches (bs=232 avail_mem=12.83 GB): 3%|▎ | 1/36 [00:00<00:06, 5.10it/s]
Capturing batches (bs=232 avail_mem=12.83 GB): 3%|▎ | 1/36 [00:00<00:06, 5.09it/s]
Capturing batches (bs=256 avail_mem=13.09 GB): 3%|▎ | 1/36 [00:00<00:10, 3.40it/s]
Capturing batches (bs=248 avail_mem=12.84 GB): 3%|▎ | 1/36 [00:00<00:10, 3.40it/s]
Capturing batches (bs=232 avail_mem=12.83 GB): 3%|▎ | 1/36 [00:00<00:06, 5.21it/s]
Capturing batches (bs=256 avail_mem=13.09 GB): 3%|▎ | 1/36 [00:00<00:10, 3.34it/s]
Capturing batches (bs=248 avail_mem=12.84 GB): 3%|▎ | 1/36 [00:00<00:10, 3.34it/s]
Capturing batches (bs=184 avail_mem=12.79 GB): 19%|█▉ | 7/36 [00:00<00:01, 16.35it/s]
Capturing batches (bs=256 avail_mem=13.09 GB): 3%|▎ | 1/36 [00:00<00:06, 5.27it/s]
Capturing batches (bs=248 avail_mem=12.84 GB): 3%|▎ | 1/36 [00:00<00:06, 5.27it/s]
Capturing batches (bs=232 avail_mem=12.83 GB): 11%|█ | 4/36 [00:00<00:02, 13.45it/s]
Capturing batches (bs=224 avail_mem=12.82 GB): 11%|█ | 4/36 [00:00<00:02, 13.45it/s]
Capturing batc
|
||
|
|
Capturing batches (bs=24 avail_mem=12.63 GB): 78%|███████▊ | 28/36 [00:01<00:00, 19.88it/s]
Capturing batches (bs=24 avail_mem=12.63 GB): 78%|███████▊ | 28/36 [00:01<00:00, 19.50it/s]
Capturing batches (bs=40 avail_mem=12.65 GB): 78%|███████▊ | 28/36 [00:01<00:00, 19.81it/s]
Capturing batches (bs=32 avail_mem=12.64 GB): 78%|███████▊ | 28/36 [00:01<00:00, 19.81it/s]
Capturing batches (bs=24 avail_mem=12.63 GB): 83%|████████▎ | 30/36 [00:01<00:00, 19.71it/s]
Capturing batches (bs=16 avail_mem=12.62 GB): 83%|████████▎ | 30/36 [00:01<00:00, 19.71it/s]
Capturing batches (bs=12 avail_mem=12.62 GB): 83%|████████▎ | 30/36 [00:01<00:00, 19.64it/s]
Capturing batches (bs=16 avail_mem=12.62 GB): 86%|████████▌ | 31/36 [00:01<00:00, 17.88it/s]
Capturing batches (bs=12 avail_mem=12.62 GB): 86%|████████▌ | 31/36 [00:01<00:00, 17.88it/s]
Capturing batches (bs=12 avail_mem=12.62 GB): 83%|████████▎ | 30/36 [00:01<00:00, 19.59it/s]
Capturing batches (bs=24 avail_mem=12.63 GB): 83%|████████▎ | 30/36 [00:01<00:00, 19.86it/s]
Capturing batches (bs=16 avail_mem=12.62 GB): 83%|████████▎ | 30/36 [00:01<00:00, 19.86it/s]
Capturing batches (bs=24 avail_mem=12.63 GB): 83%|████████▎ | 30/36 [00:01<00:00, 19.53it/s]
Capturing batches (bs=16 avail_mem=12.62 GB): 83%|████████▎ | 30/36 [00:01<00:00, 19.53it/s]
Capturing batches (bs=24 avail_mem=12.63 GB): 78%|███████▊ | 28/36 [00:01<00:00, 19.81it/s]
Capturing batches (bs=12 avail_mem=12.62 GB): 89%|████████▉ | 32/36 [00:01<00:00, 18.14it/s]
Capturing batches (bs=8 avail_mem=12.61 GB): 89%|████████▉ | 32/36 [00:01<00:00, 18.14it/s]
Capturing batches (bs=8 avail_mem=12.61 GB): 86%|████████▌ | 31/36 [00:01<00:00, 17.88it/s]
Capturing batches (bs=12 avail_mem=12.62 GB): 89%|████████▉ | 32/36 [00:01<00:00, 18.14it/s]
Capturing batches (bs=8 avail_mem=12.61 GB): 89%|████████▉ | 32/36 [00:01<00:00, 18.14it/s]
Capturing batches (bs=24 avail_mem=12.63 GB): 83%|████████▎ | 30/36 [00:01<00:00, 19.78it/s]
Capturing batches (bs=12 avail_mem=12.62 GB): 83%|████████▎ | 30/36 [00:01<00:00, 19.71it/s]
Capturing batches (bs=16 avail_mem=12.62 GB): 83%|████████▎ | 30/36 [00:01<00:00, 19.78it/s]
Capturing batches (bs=4 avail_mem=12.60 GB): 89%|████████▉ | 32/36 [00:01<00:00, 18.14it/s]
Capturing batches (bs=4 avail_mem=12.60 GB): 86%|████████▌ | 31/36 [00:01<00:00, 17.88it/s]
Capturing batches (bs=4 avail_mem=12.60 GB): 89%|████████▉ | 32/36 [00:01<00:00, 18.14it/s]
Capturing batches (bs=12 avail_mem=12.62 GB): 83%|████████▎ | 30/36 [00:01<00:00, 19.86it/s]
Capturing batches (bs=12 avail_mem=12.62 GB): 83%|████████▎ | 30/36 [00:01<00:00, 19.53it/s]
Capturing batches (bs=12 avail_mem=12.62 GB): 89%|████████▉ | 32/36 [00:01<00:00, 18.44it/s]
Capturing batches (bs=8 avail_mem=12.61 GB): 89%|████████▉ | 32/36 [00:01<00:00, 18.44it/s]
Capturing batches (bs=2 avail_mem=12.59 GB): 89%|████████▉ | 32/36 [00:01<00:00, 18.14it/s]
Capturing batches (bs=4 avail_mem=12.60 GB): 94%|█████████▍| 34/36 [00:01<00:00, 19.30it/s]
Capturing batches (bs=2 avail_mem=12.59 GB): 94%|█████████▍| 34/36 [00:01<00:00, 19.30it/s]
Capturing batches (bs=2 avail_mem=12.59 GB): 89%|████████▉ | 32/36 [00:01<00:00, 18.14it/s]
Capturing batches (bs=12 avail_mem=12.62 GB): 89%|████████▉ | 32/36 [00:01<00:00, 18.54it/s]
Capturing batches (bs=8 avail_mem=12.61 GB): 89%|████████▉ | 32/36 [00:01<00:00, 18.54it/s]
Capturing batches (bs=12 avail_mem=12.62 GB): 89%|████████
|
||
|
|
Capturing batches (bs=1 avail_mem=12.59 GB): 100%|██████████| 36/36 [00:01<00:00, 18.85it/s]
|
||
|
|
Capturing batches (bs=4 avail_mem=12.60 GB): 89%|████████▉ | 32/36 [00:01<00:00, 18.11it/s]
Capturing batches (bs=12 avail_mem=12.62 GB): 89%|████████▉ | 32/36 [00:01<00:00, 18.44it/s]
Capturing batches (bs=8 avail_mem=12.61 GB): 89%|████████▉ | 32/36 [00:01<00:00, 18.44it/s]
Capturing batches (bs=2 avail_mem=12.59 GB): 89%|████████▉ | 32/36 [00:01<00:00, 18.44it/s]
Capturing batches (bs=1 avail_mem=12.59 GB): 100%|██████████| 36/36 [00:01<00:00, 18.81it/s]
|
||
|
|
Capturing batches (bs=2 avail_mem=12.59 GB): 89%|████████▉ | 32/36 [00:01<00:00, 18.54it/s]
Capturing batches (bs=2 avail_mem=12.59 GB): 97%|█████████▋| 35/36 [00:01<00:00, 19.72it/s]
Capturing batches (bs=1 avail_mem=12.59 GB): 97%|█████████▋| 35/36 [00:01<00:00, 19.72it/s]
Capturing batches (bs=2 avail_mem=12.59 GB): 89%|████████▉ | 32/36 [00:01<00:00, 18.11it/s]
Capturing batches (bs=4 avail_mem=12.60 GB): 89%|████████▉ | 32/36 [00:01<00:00, 18.44it/s]
Capturing batches (bs=2 avail_mem=12.59 GB): 97%|█████████▋| 35/36 [00:01<00:00, 19.80it/s]
Capturing batches (bs=1 avail_mem=12.59 GB): 97%|█████████▋| 35/36 [00:01<00:00, 19.80it/s]
Capturing batches (bs=1 avail_mem=12.59 GB): 100%|██████████| 36/36 [00:02<00:00, 18.00it/s]
|
||
|
|
Capturing batches (bs=2 avail_mem=12.59 GB): 97%|█████████▋| 35/36 [00:01<00:00, 19.54it/s]
Capturing batches (bs=1 avail_mem=12.59 GB): 97%|█████████▋| 35/36 [00:01<00:00, 19.54it/s]
Capturing batches (bs=2 avail_mem=12.59 GB): 89%|████████▉ | 32/36 [00:01<00:00, 18.44it/s]
Capturing batches (bs=1 avail_mem=12.59 GB): 100%|██████████| 36/36 [00:01<00:00, 19.03it/s]
|
||
|
|
Capturing batches (bs=1 avail_mem=12.59 GB): 100%|██████████| 36/36 [00:02<00:00, 17.80it/s]
|
||
|
|
Capturing batches (bs=2 avail_mem=12.59 GB): 97%|█████████▋| 35/36 [00:01<00:00, 19.70it/s]
Capturing batches (bs=1 avail_mem=12.59 GB): 97%|█████████▋| 35/36 [00:01<00:00, 19.70it/s]
Capturing batches (bs=1 avail_mem=12.59 GB): 100%|██████████| 36/36 [00:01<00:00, 18.83it/s]
|
||
|
|
Inference: 0%| | 0/5920 [00:00<?, ?it/s][2026-04-09 16:37:52] Worker Error: Requested token count exceeds the model's maximum context length of 32768 tokens. You requested a total of 32806 tokens: 2806 tokens from the input messages and 30000 tokens for the completion. Please reduce the number of tokens in the input messages or the completion to fit within the limit.
|
||
|
|
[2026-04-09 16:37:52] Worker Error: Requested token count exceeds the model's maximum context length of 32768 tokens. You requested a total of 32806 tokens: 2806 tokens from the input messages and 30000 tokens for the completion. Please reduce the number of tokens in the input messages or the completion to fit within the limit.
|
||
|
|
[2026-04-09 16:37:52] Worker Error: Requested token count exceeds the model's maximum context length of 32768 tokens. You requested a total of 32806 tokens: 2806 tokens from the input messages and 30000 tokens for the completion. Please reduce the number of tokens in the input messages or the completion to fit within the limit.
|
||
|
|
[2026-04-09 16:37:52] Worker Error: Requested token count exceeds the model's maximum context length of 32768 tokens. You requested a total of 32806 tokens: 2806 tokens from the input messages and 30000 tokens for the completion. Please reduce the number of tokens in the input messages or the completion to fit within the limit.
|
||
|
|
Inference: 0%| | 1/5920 [00:07<12:31:32, 7.62s/it]
Inference: 0%| | 1/5920 [00:07<12:31:32, 7.62s/it, tok/s=132.3]
Inference: 0%| | 2/5920 [00:07<12:31:24, 7.62s/it, tok/s=263.2]
Inference: 0%| | 3/5920 [00:07<12:31:16, 7.62s/it, tok/s=394.6]
Inference: 0%| | 4/5920 [00:07<12:31:09, 7.62s/it, tok/s=524.7]
Inference: 0%| | 5/5920 [00:08<2:13:37, 1.36s/it, tok/s=524.7]
Inference: 0%| | 5/5920 [00:08<2:13:37, 1.36s/it, tok/s=590.0]
Inference: 0%| | 6/5920 [00:08<2:13:36, 1.36s/it, tok/s=707.7]
Inference: 0%| | 7/5920 [00:08<1:26:13, 1.14it/s, tok/s=707.7]
Inference: 0%| | 7/5920 [00:08<1:26:13, 1.14it/s, tok/s=825.7]
Inference: 0%| | 8/5920 [00:09<1:14:29, 1.32it/s, tok/s=825.7]
Inference: 0%| | 8/5920 [00:09<1:14:29, 1.32it/s, tok/s=921.2]
Inference: 0%| | 9/5920 [00:10<1:22:09, 1.20it/s, tok/s=921.2]
Inference: 0%| | 9/5920 [00:10<1:22:09, 1.20it/s, tok/s=947.6]
Inference: 0%| | 10/5920 [00:11<1:38:14, 1.00it/s, tok/s=947.6]
Inference: 0%| | 10/5920 [00:11<1:38:14, 1.00it/s, tok/s=949.0]
Inference: 0%| | 11/5920 [00:11<1:38:13, 1.00it/s, tok/s=1064.2]
Inference: 0%| | 12/5920 [00:12<1:11:52, 1.37it/s, tok/s=1064.2]
Inference: 0%| | 12/5920 [00:12<1:11:52, 1.37it/s, tok/s=1130.3]
Inference: 0%| | 13/5920 [00:13<1:08:59, 1.43it/s, tok/s=1130.3]
Inference: 0%| | 13/5920 [00:13<1:08:59, 1.43it/s, tok/s=1194.9]
Inference: 0%| | 14/5920 [00:13<1:04:24, 1.53it/s, tok/s=1194.9]
Inference: 0%| | 14/5920 [00:13<1:04:24, 1.53it/s, tok/s=1265.9]
Inference: 0%| | 15/5920 [00:13<50:17, 1.96it/s, tok/s=1265.9]
Inference: 0%| | 15/5920 [00:13<50:17, 1.96it/s, tok/s=1380.5]
Inference: 0%| | 16/5920 [00:14<50:03, 1.97it/s, tok/s=1380.5]
Inference: 0%| | 16/5920 [00:14<50:03, 1.97it/s, tok/s=1449.4]
Inference: 0%| | 17/5920 [00:14<45:13, 2.18it/s, tok/s=1449.4]
Inference: 0%| | 17/5920 [00:14<45:13, 2.18it/s, tok/s=1547.4]
Inference: 0%| | 18/5920 [00:16<1:34:17, 1.04it/s, tok/s=1547.4]
Inference: 0%| | 18/5920 [00:16<1:34:17, 1.04it/s, tok/s=1461.7]
Inference: 0%| | 19/5920 [00:16<1:11:38, 1.37it/s, tok/s=1461.7]
Inference: 0%| | 19/5920 [00:16<1:11:38, 1.37it/s, tok/s=1573.3]
Inference: 0%| | 20/5920 [00:19<1:55:05, 1.17s/it, tok/s=1573.3]
Inference: 0%| | 20/5920 [00:19<1:55:05, 1.17s/it, tok/s=1505.7]
Inference: 0%| | 21/5920 [00:19<1:34:20, 1.04it/s, tok/s=1505.7]
Inference: 0%| | 21/5920 [00:19<1:34:20, 1.04it/s, tok/s=1591.1]
Inference: 0%| | 22/5920 [00:19<1:34:19, 1.04it/s, tok/s=1710.0]
Inference: 0%| | 23/5920 [00:20<1:08:46, 1.43it/s, tok/s=1710.0]
Inference: 0%| | 23/5920 [00:20<1:08:46, 1.43it/s, tok/s=1758.5]
Inference: 0%| | 24/5920 [00:21<1:11:46, 1.37it/s, tok/s=1758.5]
Inference: 0%| | 24/5920 [00:21<1:11:46, 1.37it/s, tok/s=1800.6]
Inference: 0%| | 25/5920 [00:22<1:22:44, 1.19it/s, tok/s=1800.6]
Inference: 0%| | 25/5920 [00:22<1:22:44, 1.19it/s, tok/s=1812.3]
Inference: 0%| | 26/5920 [00:22<1:06:57, 1.47it/s, tok/s=1812.3]
Inference: 0%| | 26/5920 [00:22<1:06:57, 1.47it/s, tok/s=1897.3]
Inference: 0%| | 27/5920 [00:24<1:42:04, 1.04s/it, tok/s=1897.3]
Inference: 0%| | 27/5920 [00:24<1:42:04, 1.04s/it, tok/s=1846.8]
Inference: 0%| | 28/5920 [00:26<2:15:53, 1.38s/it, tok/s=1846.8]
Inference: 0%| | 28/5920 [00:26<2:15:53, 1.38s/it, tok/s=1793.1]
Inference: 0%| | 29/5920 [00:27<1:57:21, 1.20s/it, tok/s=1793.1]
Inference: 0%| | 29/5920 [00:27<1:57:21, 1.20s/it, tok/s=1848.7]
Inference: 1%| | 30/5920 [00:28<1:57:58, 1.20s/it, tok/s=1848.7]
Inference: 1%| | 30/5920 [00:28<1:57:58, 1.20s/i
|
||
|
|
Timeout during comparison
|
||
|
|
Timeout during comparison
|
||
|
|
Timeout during comparison
|
||
|
|
Timeout during comparison
|
||
|
|
{"stage": "all", "step01": {"task_count": 6, "instance_count": 5920, "pass_k_by_task": {"gpqa_diamond": 4, "hmmt2025": 4, "aime2024": 32, "aime2025": 32, "math500": 4, "minerva": 4}, "task_sizes": {"gpqa_diamond": 792, "hmmt2025": 120, "aime2024": 960, "aime2025": 960, "math500": 2000, "minerva": 1088}, "output_path": "/jpfs-5p/chenyanxu.9/model/Qwen3-8B-onpolicy-profiling-20260403_091551/iter_0001023-hf/eval_20260409163705/step01_prepared.jsonl"}, "step02": {"backend": "offline", "input_path": "/jpfs-5p/chenyanxu.9/model/Qwen3-8B-onpolicy-profiling-20260403_091551/iter_0001023-hf/eval_20260409163705/step01_prepared.jsonl", "output_path": "/jpfs-5p/chenyanxu.9/model/Qwen3-8B-onpolicy-profiling-20260403_091551/iter_0001023-hf/eval_20260409163705/step02_inference.jsonl"}, "step03": {"input_path": "/jpfs-5p/chenyanxu.9/model/Qwen3-8B-onpolicy-profiling-20260403_091551/iter_0001023-hf/eval_20260409163705/step02_inference.jsonl", "score_output_path": "/jpfs-5p/chenyanxu.9/model/Qwen3-8B-onpolicy-profiling-20260403_091551/iter_0001023-hf/eval_20260409163705/step03_score.jsonl", "final_eval_output_path": "/jpfs-5p/chenyanxu.9/model/Qwen3-8B-onpolicy-profiling-20260403_091551/iter_0001023-hf/eval_20260409163705/step03_final_eval.jsonl", "metrics": {"gpqa_diamond": {"avg_k": 0.5812182741116751, "pass_k": 0.7614213197969543, "avg_total_tokens": 10732.243654822336, "avg_thinking_tokens": 0.0, "max_thinking_tokens": 0.0, "min_thinking_tokens": 0.0}, "hmmt2025": {"avg_k": 0.375, "pass_k": 0.5333333333333333, "avg_total_tokens": 18450.008333333335, "avg_thinking_tokens": 0.0, "max_thinking_tokens": 0.0, "min_thinking_tokens": 0.0}, "aime2024": {"avg_k": 0.7020833333333333, "pass_k": 0.9333333333333333, "avg_total_tokens": 13840.913541666667, "avg_thinking_tokens": 0.0, "max_thinking_tokens": 0.0, "min_thinking_tokens": 0.0}, "aime2025": {"avg_k": 0.6010416666666667, "pass_k": 0.8666666666666667, "avg_total_tokens": 15299.15, "avg_thinking_tokens": 0.0, "max_thinking_tokens": 0.0, "min_thinking_tokens": 0.0}, "math500": {"avg_k": 0.952, "pass_k": 0.98, "avg_total_tokens": 4403.267, "avg_thinking_tokens": 0.0, "max_thinking_tokens": 0.0, "min_thinking_tokens": 0.0}, "minerva": {"avg_k": 0.484375, "pass_k": 0.5661764705882353, "avg_total_tokens": 6378.409007352941, "avg_thinking_tokens": 0.0, "max_thinking_tokens": 0.0, "min_thinking_tokens": 0.0}, "overall": {"avg_k": 0.7074036511156186, "pass_k": 0.8158640226628895, "avg_total_tokens": 9194.001521298174, "avg_thinking_tokens": 0.0, "max_thinking_tokens": 0.0, "min_thinking_tokens": 0.0}}}}
|
||
|
|
Traceback (most recent call last):
|
||
|
|
File "/usr/lib/python3.12/multiprocessing/resource_tracker.py", line 239, in main
|
||
|
|
cache[rtype].remove(name)
|
||
|
|
KeyError: '/mp-e49dr1x4'
|