127 lines
998 KiB
Plaintext
127 lines
998 KiB
Plaintext
|
|
[2026-04-17 05:17:59 nanoeval] run.py:42 - Step 0/1 system_prompt: None
|
||
|
|
[2026-04-17 05:17:59 nanoeval] task.py:107 - prepare_eval_input: system_prompt=None
|
||
|
|
[2026-04-17 05:18:02 nanoeval] run.py:52 - Step 0/1 completed: {'task_count': 6, 'instance_count': 5920, 'pass_k_by_task': {'gpqa_diamond': 4, 'hmmt2025': 4, 'aime2024': 32, 'aime2025': 32, 'math500': 4, 'minerva': 4}, 'task_sizes': {'gpqa_diamond': 792, 'hmmt2025': 120, 'aime2024': 960, 'aime2025': 960, 'math500': 2000, 'minerva': 1088}, 'output_path': '/jpfs-5p/chenyanxu.9/model/Qwen3-8B-onpolicy-profiling-muon-20260413_090005/iter_0001023-hf/eval_20260417051759/step01_prepared.jsonl'}
|
||
|
|
[2026-04-17 05:18:03 nanoeval] base.py:107 - Initializing Engine (TP=1)...
|
||
|
|
<frozen importlib._bootstrap_external>:1297: FutureWarning: The cuda.cudart module is deprecated and will be removed in a future release, please switch to use the cuda.bindings.runtime module instead.
|
||
|
|
<frozen importlib._bootstrap_external>:1297: FutureWarning: The cuda.nvrtc module is deprecated and will be removed in a future release, please switch to use the cuda.bindings.nvrtc module instead.
|
||
|
|
[2026-04-17 05:18:09 nanoeval] server_args.py:1837 - Attention backend not specified. Use fa3 backend by default.
|
||
|
|
[2026-04-17 05:18:09 nanoeval] engine.py:157 - server_args=ServerArgs(model_path='/jpfs-5p/chenyanxu.9/model/Qwen3-8B-onpolicy-profiling-muon-20260413_090005/iter_0001023-hf', tokenizer_path='/jpfs-5p/chenyanxu.9/model/Qwen3-8B-onpolicy-profiling-muon-20260413_090005/iter_0001023-hf', tokenizer_mode='auto', tokenizer_worker_num=1, skip_tokenizer_init=False, load_format='auto', model_loader_extra_config='{}', trust_remote_code=True, context_length=None, is_embedding=False, enable_multimodal=None, revision=None, model_impl='auto', host='127.0.0.1', port=30000, fastapi_root_path='', grpc_mode=False, skip_server_warmup=False, warmups=None, nccl_port=None, checkpoint_engine_wait_weights_before_ready=False, dtype='auto', quantization=None, quantization_param_path=None, kv_cache_dtype='auto', enable_fp32_lm_head=False, modelopt_quant=None, modelopt_checkpoint_restore_path=None, modelopt_checkpoint_save_path=None, modelopt_export_path=None, quantize_and_serve=False, rl_quant_profile=None, mem_fraction_static=0.9, max_running_requests=None, max_queued_requests=None, max_total_tokens=None, chunked_prefill_size=8192, enable_dynamic_chunking=False, max_prefill_tokens=16384, prefill_max_requests=None, schedule_policy='fcfs', enable_priority_scheduling=False, abort_on_priority_when_disabled=False, schedule_low_priority_values_first=False, priority_scheduling_preemption_threshold=10, schedule_conservativeness=1.0, page_size=1, swa_full_tokens_ratio=0.8, disable_hybrid_swa_memory=False, radix_eviction_policy='lru', enable_prefill_delayer=False, prefill_delayer_max_delay_passes=30, prefill_delayer_token_usage_low_watermark=None, prefill_delayer_forward_passes_buckets=None, prefill_delayer_wait_seconds_buckets=None, device='cuda', tp_size=1, pp_size=1, pp_max_micro_batch_size=None, pp_async_batch_depth=0, stream_interval=1, stream_output=False, random_seed=298022117, constrained_json_whitespace_pattern=None, constrained_json_disable_any_whitespace=False, watchdog_timeout=300, soft_watchdog_timeout=None, dist_timeout=None, download_dir=None, model_checksum=None, base_gpu_id=0, gpu_id_step=1, sleep_on_idle=False, custom_sigquit_handler=None, log_level='error', log_level_http=None, log_requests=False, log_requests_level=2, log_requests_format='text', log_requests_target=None, uvicorn_access_log_exclude_prefixes=[], crash_dump_folder=None, show_time_cost=False, enable_metrics=False, enable_metrics_for_all_schedulers=False, tokenizer_metrics_custom_labels_header='x-custom-labels', tokenizer_metrics_allowed_custom_labels=None, extra_metric_labels=None, bucket_time_to_first_token=None, bucket_inter_token_latency=None, bucket_e2e_request_latency=None, collect_tokens_histogram=False, prompt_tokens_buckets=None, generation_tokens_buckets=None, gc_warning_threshold_secs=0.0, decode_log_interval=40, enable_request_time_stats_logging=False, kv_events_config=None, enable_trace=False, otlp_traces_endpoint='localhost:4317', export_metrics_to_file=False, export_metrics_to_file_dir=None, api_key=None, admin_api_key=None, served_model_name='/jpfs-5p/chenyanxu.9/model/Qwen3-8B-onpolicy-profiling-muon-20260413_090005/iter_0001023-hf', weight_version='default', chat_template=None, hf_chat_template_name=None, completion_template=None, file_storage_path='sglang_storage', enable_cache_report=False, reasoning_parser=None, tool_call_parser=None, tool_server=None, sampling_defaults='model', dp_size=8, load_balance_method='round_robin', attn_cp_size=1, moe_dp_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', preferred_sampling_params=None, enable_lora=None, enable_lora_overlap_loading=None, max_lora_rank=None, lora_target_modules=None, lora_paths=None, max_loaded_loras=None, max_loras_per_batch=8, lora_eviction_policy='lru', lora_backend='csgmv', max_lora_chunk_size=16, attention_backend='fa3', decode_attention_backend=None, prefill_attention_backend=None, sampling_backend='flashinfer', grammar_backend='xgrammar', mm_attention_backend=None, fp8_gemm_runner_backend='auto', fp4_gemm_runner_backend='flashinfer_cutlass', nsa_prefill_back
|
||
|
|
<frozen importlib._bootstrap_external>:1297: FutureWarning: The cuda.cudart module is deprecated and will be removed in a future release, please switch to use the cuda.bindings.runtime module instead.
|
||
|
|
<frozen importlib._bootstrap_external>:1297: FutureWarning: The cuda.nvrtc module is deprecated and will be removed in a future release, please switch to use the cuda.bindings.nvrtc module instead.
|
||
|
|
<frozen importlib._bootstrap_external>:1297: FutureWarning: The cuda.cudart module is deprecated and will be removed in a future release, please switch to use the cuda.bindings.runtime module instead.
|
||
|
|
<frozen importlib._bootstrap_external>:1297: FutureWarning: The cuda.nvrtc module is deprecated and will be removed in a future release, please switch to use the cuda.bindings.nvrtc module instead.
|
||
|
|
<frozen importlib._bootstrap_external>:1297: FutureWarning: The cuda.cudart module is deprecated and will be removed in a future release, please switch to use the cuda.bindings.runtime module instead.
|
||
|
|
<frozen importlib._bootstrap_external>:1297: FutureWarning: The cuda.cudart module is deprecated and will be removed in a future release, please switch to use the cuda.bindings.runtime module instead.
|
||
|
|
<frozen importlib._bootstrap_external>:1297: FutureWarning: The cuda.nvrtc module is deprecated and will be removed in a future release, please switch to use the cuda.bindings.nvrtc module instead.
|
||
|
|
<frozen importlib._bootstrap_external>:1297: FutureWarning: The cuda.nvrtc module is deprecated and will be removed in a future release, please switch to use the cuda.bindings.nvrtc module instead.
|
||
|
|
<frozen importlib._bootstrap_external>:1297: FutureWarning: The cuda.cudart module is deprecated and will be removed in a future release, please switch to use the cuda.bindings.runtime module instead.
|
||
|
|
<frozen importlib._bootstrap_external>:1297: FutureWarning: The cuda.nvrtc module is deprecated and will be removed in a future release, please switch to use the cuda.bindings.nvrtc module instead.
|
||
|
|
<frozen importlib._bootstrap_external>:1297: FutureWarning: The cuda.cudart module is deprecated and will be removed in a future release, please switch to use the cuda.bindings.runtime module instead.
|
||
|
|
<frozen importlib._bootstrap_external>:1297: FutureWarning: The cuda.nvrtc module is deprecated and will be removed in a future release, please switch to use the cuda.bindings.nvrtc module instead.
|
||
|
|
<frozen importlib._bootstrap_external>:1297: FutureWarning: The cuda.cudart module is deprecated and will be removed in a future release, please switch to use the cuda.bindings.runtime module instead.
|
||
|
|
<frozen importlib._bootstrap_external>:1297: FutureWarning: The cuda.nvrtc module is deprecated and will be removed in a future release, please switch to use the cuda.bindings.nvrtc module instead.
|
||
|
|
<frozen importlib._bootstrap_external>:1297: FutureWarning: The cuda.cudart module is deprecated and will be removed in a future release, please switch to use the cuda.bindings.runtime module instead.
|
||
|
|
<frozen importlib._bootstrap_external>:1297: FutureWarning: The cuda.nvrtc module is deprecated and will be removed in a future release, please switch to use the cuda.bindings.nvrtc module instead.
|
||
|
|
<frozen importlib._bootstrap_external>:1297: FutureWarning: The cuda.cudart module is deprecated and will be removed in a future release, please switch to use the cuda.bindings.runtime module instead.
|
||
|
|
<frozen importlib._bootstrap_external>:1297: FutureWarning: The cuda.nvrtc module is deprecated and will be removed in a future release, please switch to use the cuda.bindings.nvrtc module instead.
|
||
|
|
<frozen importlib._bootstrap_external>:1297: FutureWarning: The cuda.cudart module is deprecated and will be removed in a future release, please switch to use the cuda.bindings.runtime module instead.
|
||
|
|
<frozen importlib._bootstrap_external>:1297: FutureWarning: The cuda.nvrtc module is deprecated and will be removed in a future release, please switch to use the cuda.bindings.nvrtc module instead.
|
||
|
|
[Gloo] Rank 0 is connected to 0 peer ranks. Expected number of connected peer ranks is : 0
|
||
|
|
[Gloo] Rank 0 is connected to 0 peer ranks. Expected number of connected peer ranks is : 0
|
||
|
|
[Gloo] Rank 0 is connected to 0 peer ranks. Expected number of connected peer ranks is : 0
|
||
|
|
[Gloo] Rank 0 is connected to 0 peer ranks. Expected number of connected peer ranks is : 0
|
||
|
|
[Gloo] Rank 0 is connected to 0 peer ranks. Expected number of connected peer ranks is : 0
|
||
|
|
[Gloo] Rank 0 is connected to 0 peer ranks. Expected number of connected peer ranks is : 0
|
||
|
|
[Gloo] Rank 0 is connected to 0 peer ranks. Expected number of connected peer ranks is : 0
|
||
|
|
[Gloo] Rank 0 is connected to 0 peer ranks. Expected number of connected peer ranks is : 0
|
||
|
|
[Gloo] Rank 0 is connected to 0 peer ranks. Expected number of connected peer ranks is : 0
|
||
|
|
[Gloo] Rank 0 is connected to 0 peer ranks. Expected number of connected peer ranks is : 0
|
||
|
|
[Gloo] Rank 0 is connected to 0 peer ranks. Expected number of connected peer ranks is : 0
|
||
|
|
[Gloo] Rank 0 is connected to 0 peer ranks. Expected number of connected peer ranks is : 0
|
||
|
|
[Gloo] Rank 0 is connected to 0 peer ranks. Expected number of connected peer ranks is : 0
|
||
|
|
[Gloo] Rank 0 is connected to 0 peer ranks. Expected number of connected peer ranks is : 0
|
||
|
|
[Gloo] Rank 0 is connected to 0 peer ranks. Expected number of connected peer ranks is : 0
|
||
|
|
[Gloo] Rank 0 is connected to 0 peer ranks. Expected number of connected peer ranks is : 0
|
||
|
|
[Gloo] Rank 0 is connected to 0 peer ranks. Expected number of connected peer ranks is : 0
|
||
|
|
[Gloo] Rank 0 is connected to 0 peer ranks. Expected number of connected peer ranks is : 0
|
||
|
|
[Gloo] Rank 0 is connected to 0 peer ranks. Expected number of connected peer ranks is : 0
|
||
|
|
[Gloo] Rank 0 is connected to 0 peer ranks. Expected number of connected peer ranks is : 0
|
||
|
|
[Gloo] Rank 0 is connected to 0 peer ranks. Expected number of connected peer ranks is : 0
|
||
|
|
[Gloo] Rank 0 is connected to 0 peer ranks. Expected number of connected peer ranks is : 0
|
||
|
|
[Gloo] Rank 0 is connected to 0 peer ranks. Expected number of connected peer ranks is : 0
|
||
|
|
[Gloo] Rank 0 is connected to 0 peer ranks. Expected number of connected peer ranks is : 0
|
||
|
|
Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
|
||
|
|
Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
|
||
|
|
Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
|
||
|
|
Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
|
||
|
|
Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
|
||
|
|
Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
|
||
|
|
Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
|
||
|
|
Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
|
||
|
|
Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:03<00:10, 3.48s/it]
|
||
|
|
Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:03<00:10, 3.37s/it]
|
||
|
|
Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:03<00:10, 3.61s/it]
|
||
|
|
Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:03<00:10, 3.48s/it]
|
||
|
|
Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:03<00:10, 3.47s/it]
|
||
|
|
Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:03<00:10, 3.45s/it]
|
||
|
|
Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:03<00:10, 3.49s/it]
|
||
|
|
Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:03<00:10, 3.45s/it]
|
||
|
|
Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:06<00:06, 3.24s/it]
|
||
|
|
Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:06<00:06, 3.18s/it]
|
||
|
|
Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:06<00:06, 3.17s/it]
|
||
|
|
Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:06<00:06, 3.19s/it]
|
||
|
|
Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:06<00:06, 3.14s/it]
|
||
|
|
Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:06<00:06, 3.18s/it]
|
||
|
|
Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:06<00:06, 3.19s/it]
|
||
|
|
Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:06<00:06, 3.17s/it]
|
||
|
|
Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:09<00:03, 3.10s/it]
|
||
|
|
Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:09<00:03, 3.09s/it]
|
||
|
|
Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:09<00:03, 3.10s/it]
|
||
|
|
Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:09<00:03, 3.07s/it]
|
||
|
|
Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:09<00:03, 3.10s/it]
|
||
|
|
Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:09<00:03, 3.09s/it]
|
||
|
|
Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:09<00:03, 3.09s/it]
|
||
|
|
Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:09<00:03, 3.12s/it]
|
||
|
|
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:10<00:00, 2.15s/it]
|
||
|
|
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:10<00:00, 2.56s/it]
|
||
|
|
|
||
|
|
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:10<00:00, 2.17s/it]
|
||
|
|
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:10<00:00, 2.55s/it]
|
||
|
|
|
||
|
|
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:10<00:00, 2.19s/it]
|
||
|
|
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:10<00:00, 2.18s/it]
|
||
|
|
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:10<00:00, 2.19s/it]
|
||
|
|
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:10<00:00, 2.56s/it]
|
||
|
|
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:10<00:00, 2.55s/it]
|
||
|
|
|
||
|
|
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:10<00:00, 2.17s/it]
|
||
|
|
|
||
|
|
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:10<00:00, 2.56s/it]
|
||
|
|
|
||
|
|
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:10<00:00, 2.53s/it]
|
||
|
|
|
||
|
|
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:10<00:00, 2.18s/it]
|
||
|
|
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:10<00:00, 2.55s/it]
|
||
|
|
|
||
|
|
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:10<00:00, 2.19s/it]
|
||
|
|
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:10<00:00, 2.56s/it]
|
||
|
|
|
||
|
|
0%| | 0/36 [00:00<?, ?it/s]
Capturing batches (bs=256 avail_mem=13.09 GB): 0%| | 0/36 [00:00<?, ?it/s]
Capturing batches (bs=256 avail_mem=13.09 GB): 3%|▎ | 1/36 [00:00<00:07, 4.98it/s]
Capturing batches (bs=248 avail_mem=12.84 GB): 3%|▎ | 1/36 [00:00<00:07, 4.98it/s]
Capturing batches (bs=240 avail_mem=12.83 GB): 3%|▎ | 1/36 [00:00<00:07, 4.98it/s]
0%| | 0/36 [00:00<?, ?it/s]
Capturing batches (bs=256 avail_mem=13.09 GB): 0%| | 0/36 [00:00<?, ?it/s]
0%| | 0/36 [00:00<?, ?it/s]
Capturing batches (bs=256 avail_mem=13.09 GB): 0%| | 0/36 [00:00<?, ?it/s]
Capturing batches (bs=232 avail_mem=12.83 GB): 3%|▎ | 1/36 [00:00<00:07, 4.98it/s]
0%| | 0/36 [00:00<?, ?it/s]
Capturing batches (bs=256 avail_mem=13.09 GB): 0%| | 0/36 [00:00<?, ?it/s]
Capturing batches (bs=232 avail_mem=12.83 GB): 11%|█ | 4/36 [00:00<00:02, 13.52it/s]
Capturing batches (bs=224 avail_mem=12.82 GB): 11%|█ | 4/36 [00:00<00:02, 13.52it/s]
0%| | 0/36 [00:00<?, ?it/s]
Capturing batches (bs=256 avail_mem=13.09 GB): 0%| | 0/36 [00:00<?, ?it/s]
Capturing batches (bs=216 avail_mem=12.81 GB): 11%|█ | 4/36 [00:00<00:02, 13.52it/s]
Capturing batches (bs=208 avail_mem=12.81 GB): 11%|█ | 4/36 [00:00<00:02, 13.52it/s]
Capturing batches (bs=256 avail_mem=13.09 GB): 3%|▎ | 1/36 [00:00<00:07, 4.98it/s]
Capturing batches (bs=248 avail_mem=12.84 GB): 3%|▎ | 1/36 [00:00<00:07, 4.98it/s]
Capturing batches (bs=256 avail_mem=13.09 GB): 3%|▎ | 1/36 [00:00<00:07, 4.99it/s]
Capturing batches (bs=248 avail_mem=12.84 GB): 3%|▎ | 1/36 [00:00<00:07, 4.99it/s]
Capturing batches (bs=208 avail_mem=12.81 GB): 19%|█▉ | 7/36 [00:00<00:01, 16.89it/s]
Capturing batches (bs=200 avail_mem=12.80 GB): 19%|█▉ | 7/36 [00:00<00:01, 16.89it/s]
Capturing batches (bs=240 avail_mem=12.83 GB): 3%|▎ | 1/36 [00:00<00:07, 4.98it/s]
Capturing batches (bs=240 avail_mem=12.83 GB): 3%|▎ | 1/36 [00:00<00:07, 4.99it/s]
Capturing batches (bs=192 avail_mem=12.79 GB): 19%|█▉ | 7/36 [00:00<00:01, 16.89it/s]
Capturing batches (bs=256 avail_mem=13.09 GB): 3%|▎ | 1/36 [00:00<00:06, 5.20it/s]
Capturing batches (bs=248 avail_mem=12.84 GB): 3%|▎ | 1/36 [00:00<00:06, 5.20it/s]
Capturing batches (bs=232 avail_mem=12.83 GB): 3%|▎ | 1/36 [00:00<00:07, 4.98it/s]
0%| | 0/36 [00:00<?, ?it/s]
Capturing batches (bs=256 avail_mem=13.09 GB): 0%| | 0/36 [00:00<?, ?it/s]
Capturing batches (bs=256 avail_mem=13.09 GB): 3%|▎ | 1/36 [00:00<00:06, 5.38it/s]
Capturing batches (bs=248 avail_mem=12.84 GB): 3%|▎ | 1/36 [00:00<00:06, 5.38it/s]
Capturing batches (bs=232 avail_mem=12.83 GB): 3%|▎ | 1/36 [00:00<00:07, 4.99it/s]
0%| | 0/36 [00:00<?, ?it/s]
Capturing batches (bs=256 avail_mem=13.09 GB): 0%| | 0/36 [00:00<?, ?it/s]
Capturing batches (bs=184 avail_mem=12.79 GB): 19%|█▉ | 7/36 [00:00<00:01, 16.89it/s]
Capturing batches (bs=240 avail_mem=12.83 GB): 3%|▎ | 1/36 [00:00<00:06, 5.20it/s]
Capturing batches (bs=232 avail_mem=12.83 GB): 11%|█ | 4/36 [00:00<00:02, 13.48it/s]
Capturing batches (bs=224 avail_mem=12.82 GB): 11%|█ | 4/36 [00:00<00:02, 13.48it/s]
Capturing batches (bs=240 avail_mem=12.83 GB): 3%|▎ | 1/36 [00:00<00:06, 5.38it/s]
Capturing batches (bs=232 avail_mem=12.83 GB): 11%|█ | 4/36 [00:00<00:02, 13.54it/s]
Capturing batches (bs=224 avail_mem=12.82 GB): 11%|█ | 4/36 [00:00<00:02, 13.54it/s]
Capturing batches (bs=184 avail_mem=12.79 GB): 28%|██▊ | 10/36 [00:00<00:01, 18.90it/s]
Capturing batches (bs=176 avail_mem=12.78 GB): 28%|██▊ | 10/36 [00:00<00:01, 18.90it/s]
0%| | 0/36 [00:00<?, ?it/s]
Capturing batches (bs=256 avail_mem=13.09 GB): 0%| | 0/36 [00:00<?, ?it/s]
Capt
|
||
|
|
Capturing batches (bs=16 avail_mem=12.62 GB): 78%|███████▊ | 28/36 [00:01<00:00, 20.22it/s]
Capturing batches (bs=16 avail_mem=12.62 GB): 78%|███████▊ | 28/36 [00:01<00:00, 20.28it/s]
Capturing batches (bs=24 avail_mem=12.63 GB): 78%|███████▊ | 28/36 [00:01<00:00, 20.35it/s]
Capturing batches (bs=24 avail_mem=12.63 GB): 78%|███████▊ | 28/36 [00:01<00:00, 20.69it/s]
Capturing batches (bs=64 avail_mem=12.67 GB): 69%|██████▉ | 25/36 [00:01<00:00, 20.60it/s]
Capturing batches (bs=56 avail_mem=12.66 GB): 69%|██████▉ | 25/36 [00:01<00:00, 20.60it/s]
Capturing batches (bs=64 avail_mem=12.67 GB): 69%|██████▉ | 25/36 [00:01<00:00, 20.51it/s]
Capturing batches (bs=56 avail_mem=12.66 GB): 69%|██████▉ | 25/36 [00:01<00:00, 20.51it/s]
Capturing batches (bs=64 avail_mem=12.67 GB): 61%|██████ | 22/36 [00:01<00:00, 20.51it/s]
Capturing batches (bs=16 avail_mem=12.62 GB): 78%|███████▊ | 28/36 [00:01<00:00, 20.35it/s]
Capturing batches (bs=16 avail_mem=12.62 GB): 78%|███████▊ | 28/36 [00:01<00:00, 20.69it/s]
Capturing batches (bs=48 avail_mem=12.65 GB): 69%|██████▉ | 25/36 [00:01<00:00, 20.60it/s]
Capturing batches (bs=48 avail_mem=12.65 GB): 69%|██████▉ | 25/36 [00:01<00:00, 20.51it/s]
Capturing batches (bs=64 avail_mem=12.67 GB): 69%|██████▉ | 25/36 [00:01<00:00, 20.47it/s]
Capturing batches (bs=56 avail_mem=12.66 GB): 69%|██████▉ | 25/36 [00:01<00:00, 20.47it/s]
Capturing batches (bs=16 avail_mem=12.62 GB): 86%|████████▌ | 31/36 [00:01<00:00, 18.70it/s]
Capturing batches (bs=12 avail_mem=12.62 GB): 86%|████████▌ | 31/36 [00:01<00:00, 18.70it/s]
Capturing batches (bs=16 avail_mem=12.62 GB): 86%|████████▌ | 31/36 [00:01<00:00, 18.75it/s]
Capturing batches (bs=12 avail_mem=12.62 GB): 86%|████████▌ | 31/36 [00:01<00:00, 18.75it/s]
Capturing batches (bs=40 avail_mem=12.65 GB): 69%|██████▉ | 25/36 [00:01<00:00, 20.60it/s]
Capturing batches (bs=8 avail_mem=12.61 GB): 86%|████████▌ | 31/36 [00:01<00:00, 18.70it/s]
Capturing batches (bs=40 avail_mem=12.65 GB): 69%|██████▉ | 25/36 [00:01<00:00, 20.51it/s]
Capturing batches (bs=8 avail_mem=12.61 GB): 86%|████████▌ | 31/36 [00:01<00:00, 18.75it/s]
Capturing batches (bs=48 avail_mem=12.65 GB): 69%|██████▉ | 25/36 [00:01<00:00, 20.47it/s]
Capturing batches (bs=16 avail_mem=12.62 GB): 86%|████████▌ | 31/36 [00:01<00:00, 18.82it/s]
Capturing batches (bs=12 avail_mem=12.62 GB): 86%|████████▌ | 31/36 [00:01<00:00, 18.82it/s]
Capturing batches (bs=16 avail_mem=12.62 GB): 86%|████████▌ | 31/36 [00:01<00:00, 19.11it/s]
Capturing batches (bs=12 avail_mem=12.62 GB): 86%|████████▌ | 31/36 [00:01<00:00, 19.11it/s]
Capturing batches (bs=40 avail_mem=12.65 GB): 78%|███████▊ | 28/36 [00:01<00:00, 20.52it/s]
Capturing batches (bs=32 avail_mem=12.64 GB): 78%|███████▊ | 28/36 [00:01<00:00, 20.52it/s]
Capturing batches (bs=4 avail_mem=12.60 GB): 86%|████████▌ | 31/36 [00:01<00:00, 18.70it/s]
Capturing batches (bs=40 avail_mem=12.65 GB): 78%|███████▊ | 28/36 [00:01<00:00, 20.40it/s]
Capturing batches (bs=32 avail_mem=12.64 GB): 78%|███████▊ | 28/36 [00:01<00:00, 20.40it/s]
Capturing batches (bs=4 avail_mem=12.60 GB): 86%|████████▌ | 31/36 [00:01<00:00, 18.75it/s]
Capturing batches (bs=8 avail_mem=12.61 GB): 86%|████████▌ | 31/36 [00:01<00:00, 18.82it/s]
Capturing batches (bs=40 avail_mem=12.65 GB): 69%|██████▉ | 25/36 [00:01<00:00, 20.47it/s]
Capturing batches (bs=8 avail_mem=12.61 GB): 86%|████████▌ | 31/36 [00:01<00:00, 19.11it/s]
Capturing batches (bs=24 a
|
||
|
|
Capturing batches (bs=1 avail_mem=12.59 GB): 100%|██████████| 36/36 [00:01<00:00, 19.26it/s]
|
||
|
|
Capturing batches (bs=1 avail_mem=12.59 GB): 94%|█████████▍| 34/36 [00:01<00:00, 19.98it/s]
Capturing batches (bs=1 avail_mem=12.59 GB): 94%|█████████▍| 34/36 [00:01<00:00, 20.28it/s]
Capturing batches (bs=16 avail_mem=12.62 GB): 78%|███████▊ | 28/36 [00:01<00:00, 20.33it/s]
Capturing batches (bs=1 avail_mem=12.59 GB): 100%|██████████| 36/36 [00:01<00:00, 19.38it/s]
|
||
|
|
Capturing batches (bs=16 avail_mem=12.62 GB): 86%|████████▌ | 31/36 [00:01<00:00, 19.00it/s]
Capturing batches (bs=12 avail_mem=12.62 GB): 86%|████████▌ | 31/36 [00:01<00:00, 19.00it/s]
Capturing batches (bs=1 avail_mem=12.59 GB): 100%|██████████| 36/36 [00:01<00:00, 19.71it/s]
|
||
|
|
Capturing batches (bs=16 avail_mem=12.62 GB): 86%|████████▌ | 31/36 [00:01<00:00, 18.85it/s]
Capturing batches (bs=12 avail_mem=12.62 GB): 86%|████████▌ | 31/36 [00:01<00:00, 18.85it/s]
Capturing batches (bs=8 avail_mem=12.61 GB): 86%|████████▌ | 31/36 [00:01<00:00, 19.00it/s]
Capturing batches (bs=8 avail_mem=12.61 GB): 86%|████████▌ | 31/36 [00:01<00:00, 18.85it/s]
Capturing batches (bs=16 avail_mem=12.62 GB): 86%|████████▌ | 31/36 [00:01<00:00, 18.84it/s]
Capturing batches (bs=12 avail_mem=12.62 GB): 86%|████████▌ | 31/36 [00:01<00:00, 18.84it/s]
Capturing batches (bs=4 avail_mem=12.60 GB): 86%|████████▌ | 31/36 [00:01<00:00, 19.00it/s]
Capturing batches (bs=4 avail_mem=12.60 GB): 86%|████████▌ | 31/36 [00:01<00:00, 18.85it/s]
Capturing batches (bs=8 avail_mem=12.61 GB): 86%|████████▌ | 31/36 [00:01<00:00, 18.84it/s]
Capturing batches (bs=4 avail_mem=12.60 GB): 94%|█████████▍| 34/36 [00:01<00:00, 20.19it/s]
Capturing batches (bs=2 avail_mem=12.59 GB): 94%|█████████▍| 34/36 [00:01<00:00, 20.19it/s]
Capturing batches (bs=4 avail_mem=12.60 GB): 94%|█████████▍| 34/36 [00:01<00:00, 20.02it/s]
Capturing batches (bs=2 avail_mem=12.59 GB): 94%|█████████▍| 34/36 [00:01<00:00, 20.02it/s]
Capturing batches (bs=4 avail_mem=12.60 GB): 86%|████████▌ | 31/36 [00:01<00:00, 18.84it/s]
Capturing batches (bs=1 avail_mem=12.59 GB): 94%|█████████▍| 34/36 [00:01<00:00, 20.19it/s]
Capturing batches (bs=1 avail_mem=12.59 GB): 94%|█████████▍| 34/36 [00:01<00:00, 20.02it/s]
Capturing batches (bs=1 avail_mem=12.59 GB): 100%|██████████| 36/36 [00:01<00:00, 19.49it/s]
|
||
|
|
Capturing batches (bs=4 avail_mem=12.60 GB): 94%|█████████▍| 34/36 [00:01<00:00, 20.00it/s]
Capturing batches (bs=2 avail_mem=12.59 GB): 94%|█████████▍| 34/36 [00:01<00:00, 20.00it/s]
Capturing batches (bs=1 avail_mem=12.59 GB): 100%|██████████| 36/36 [00:01<00:00, 19.47it/s]
|
||
|
|
Capturing batches (bs=1 avail_mem=12.59 GB): 94%|█████████▍| 34/36 [00:01<00:00, 20.00it/s]
Capturing batches (bs=1 avail_mem=12.59 GB): 100%|██████████| 36/36 [00:01<00:00, 19.40it/s]
|
||
|
|
Inference: 0%| | 0/5920 [00:00<?, ?it/s][2026-04-17 05:18:43] Worker Error: Requested token count exceeds the model's maximum context length of 32768 tokens. You requested a total of 32806 tokens: 2806 tokens from the input messages and 30000 tokens for the completion. Please reduce the number of tokens in the input messages or the completion to fit within the limit.
|
||
|
|
[2026-04-17 05:18:43] Worker Error: Requested token count exceeds the model's maximum context length of 32768 tokens. You requested a total of 32806 tokens: 2806 tokens from the input messages and 30000 tokens for the completion. Please reduce the number of tokens in the input messages or the completion to fit within the limit.
|
||
|
|
[2026-04-17 05:18:43] Worker Error: Requested token count exceeds the model's maximum context length of 32768 tokens. You requested a total of 32806 tokens: 2806 tokens from the input messages and 30000 tokens for the completion. Please reduce the number of tokens in the input messages or the completion to fit within the limit.
|
||
|
|
[2026-04-17 05:18:43] Worker Error: Requested token count exceeds the model's maximum context length of 32768 tokens. You requested a total of 32806 tokens: 2806 tokens from the input messages and 30000 tokens for the completion. Please reduce the number of tokens in the input messages or the completion to fit within the limit.
|
||
|
|
Inference: 0%| | 1/5920 [00:08<13:48:02, 8.39s/it]
Inference: 0%| | 1/5920 [00:08<13:48:02, 8.39s/it, tok/s=125.1]
Inference: 0%| | 2/5920 [00:08<13:47:53, 8.39s/it, tok/s=250.0]
Inference: 0%| | 3/5920 [00:08<13:47:45, 8.39s/it, tok/s=374.6]
Inference: 0%| | 4/5920 [00:08<13:47:36, 8.39s/it, tok/s=495.7]
Inference: 0%| | 5/5920 [00:10<2:45:47, 1.68s/it, tok/s=495.7]
Inference: 0%| | 5/5920 [00:10<2:45:47, 1.68s/it, tok/s=527.7]
Inference: 0%| | 6/5920 [00:12<2:51:26, 1.74s/it, tok/s=527.7]
Inference: 0%| | 6/5920 [00:12<2:51:26, 1.74s/it, tok/s=575.3]
Inference: 0%| | 7/5920 [00:13<2:25:31, 1.48s/it, tok/s=575.3]
Inference: 0%| | 7/5920 [00:13<2:25:31, 1.48s/it, tok/s=672.9]
Inference: 0%| | 8/5920 [00:13<1:59:14, 1.21s/it, tok/s=672.9]
Inference: 0%| | 8/5920 [00:13<1:59:14, 1.21s/it, tok/s=777.3]
Inference: 0%| | 9/5920 [00:14<2:02:26, 1.24s/it, tok/s=777.3]
Inference: 0%| | 9/5920 [00:14<2:02:26, 1.24s/it, tok/s=825.0]
Inference: 0%| | 10/5920 [00:15<1:42:16, 1.04s/it, tok/s=825.0]
Inference: 0%| | 10/5920 [00:15<1:42:16, 1.04s/it, tok/s=914.1]
Inference: 0%| | 11/5920 [00:16<1:32:07, 1.07it/s, tok/s=914.1]
Inference: 0%| | 11/5920 [00:16<1:32:07, 1.07it/s, tok/s=985.8]
Inference: 0%| | 12/5920 [00:16<1:32:06, 1.07it/s, tok/s=1101.2]
Inference: 0%| | 13/5920 [00:16<1:06:36, 1.48it/s, tok/s=1101.2]
Inference: 0%| | 13/5920 [00:16<1:06:36, 1.48it/s, tok/s=1163.6]
Inference: 0%| | 14/5920 [00:17<56:33, 1.74it/s, tok/s=1163.6]
Inference: 0%| | 14/5920 [00:17<56:33, 1.74it/s, tok/s=1254.9]
Inference: 0%| | 15/5920 [00:19<1:40:37, 1.02s/it, tok/s=1254.9]
Inference: 0%| | 15/5920 [00:19<1:40:37, 1.02s/it, tok/s=1213.1]
Inference: 0%| | 16/5920 [00:19<1:18:40, 1.25it/s, tok/s=1213.1]
Inference: 0%| | 16/5920 [00:19<1:18:40, 1.25it/s, tok/s=1314.5]
Inference: 0%| | 17/5920 [00:19<1:00:19, 1.63it/s, tok/s=1314.5]
Inference: 0%| | 17/5920 [00:19<1:00:19, 1.63it/s, tok/s=1414.9]
Inference: 0%| | 18/5920 [00:19<48:46, 2.02it/s, tok/s=1414.9]
Inference: 0%| | 18/5920 [00:19<48:46, 2.02it/s, tok/s=1519.6]
Inference: 0%| | 19/5920 [00:20<46:56, 2.10it/s, tok/s=1519.6]
Inference: 0%| | 19/5920 [00:20<46:56, 2.10it/s, tok/s=1600.2]
Inference: 0%| | 20/5920 [00:20<40:48, 2.41it/s, tok/s=1600.2]
Inference: 0%| | 20/5920 [00:20<40:48, 2.41it/s, tok/s=1691.9]
Inference: 0%| | 21/5920 [00:20<35:06, 2.80it/s, tok/s=1691.9]
Inference: 0%| | 21/5920 [00:20<35:06, 2.80it/s, tok/s=1790.6]
Inference: 0%| | 22/5920 [00:21<38:31, 2.55it/s, tok/s=1790.6]
Inference: 0%| | 22/5920 [00:21<38:31, 2.55it/s, tok/s=1867.3]
Inference: 0%| | 23/5920 [00:21<31:37, 3.11it/s, tok/s=1867.3]
Inference: 0%| | 23/5920 [00:21<31:37, 3.11it/s, tok/s=1958.8]
Inference: 0%| | 24/5920 [00:22<46:35, 2.11it/s, tok/s=1958.8]
Inference: 0%| | 24/5920 [00:22<46:35, 2.11it/s, tok/s=1990.2]
Inference: 0%| | 25/5920 [00:22<40:22, 2.43it/s, tok/s=1990.2]
Inference: 0%| | 25/5920 [00:22<40:22, 2.43it/s, tok/s=2072.8]
Inference: 0%| | 26/5920 [00:25<1:52:15, 1.14s/it, tok/s=2072.8]
Inference: 0%| | 26/5920 [00:25<1:52:15, 1.14s/it, tok/s=1946.8]
Inference: 0%| | 27/5920 [00:27<2:16:27, 1.39s/it, tok/s=1946.8]
Inference: 0%| | 27/5920 [00:27<2:16:27, 1.39s/it, tok/s=1903.8]
Inference: 0%| | 28/5920 [00:27<1:45:39, 1.08s/it, tok/s=1903.8]
Inference: 0%| | 28/5920 [00:27<1:45:39, 1.08s/it, tok/s=1984.0]
Inference: 0%| | 29/5920 [00:27<1:45:37, 1.08s/it, tok/s=2085.2]
Inference: 1%| | 30/5920 [00:28<1:15:58, 1.29it/s, tok/s=2085.2]
Inference
|
||
|
|
{"stage": "all", "step01": {"task_count": 6, "instance_count": 5920, "pass_k_by_task": {"gpqa_diamond": 4, "hmmt2025": 4, "aime2024": 32, "aime2025": 32, "math500": 4, "minerva": 4}, "task_sizes": {"gpqa_diamond": 792, "hmmt2025": 120, "aime2024": 960, "aime2025": 960, "math500": 2000, "minerva": 1088}, "output_path": "/jpfs-5p/chenyanxu.9/model/Qwen3-8B-onpolicy-profiling-muon-20260413_090005/iter_0001023-hf/eval_20260417051759/step01_prepared.jsonl"}, "step02": {"backend": "offline", "input_path": "/jpfs-5p/chenyanxu.9/model/Qwen3-8B-onpolicy-profiling-muon-20260413_090005/iter_0001023-hf/eval_20260417051759/step01_prepared.jsonl", "output_path": "/jpfs-5p/chenyanxu.9/model/Qwen3-8B-onpolicy-profiling-muon-20260413_090005/iter_0001023-hf/eval_20260417051759/step02_inference.jsonl"}, "step03": {"input_path": "/jpfs-5p/chenyanxu.9/model/Qwen3-8B-onpolicy-profiling-muon-20260413_090005/iter_0001023-hf/eval_20260417051759/step02_inference.jsonl", "score_output_path": "/jpfs-5p/chenyanxu.9/model/Qwen3-8B-onpolicy-profiling-muon-20260413_090005/iter_0001023-hf/eval_20260417051759/step03_score.jsonl", "final_eval_output_path": "/jpfs-5p/chenyanxu.9/model/Qwen3-8B-onpolicy-profiling-muon-20260413_090005/iter_0001023-hf/eval_20260417051759/step03_final_eval.jsonl", "metrics": {"gpqa_diamond": {"avg_k": 0.5774111675126904, "pass_k": 0.766497461928934, "avg_total_tokens": 10612.370558375635, "avg_thinking_tokens": 0.0, "max_thinking_tokens": 0.0, "min_thinking_tokens": 0.0}, "hmmt2025": {"avg_k": 0.31666666666666665, "pass_k": 0.43333333333333335, "avg_total_tokens": 18237.566666666666, "avg_thinking_tokens": 0.0, "max_thinking_tokens": 0.0, "min_thinking_tokens": 0.0}, "aime2024": {"avg_k": 0.6802083333333333, "pass_k": 0.9, "avg_total_tokens": 14426.277083333332, "avg_thinking_tokens": 0.0, "max_thinking_tokens": 0.0, "min_thinking_tokens": 0.0}, "aime2025": {"avg_k": 0.571875, "pass_k": 0.8666666666666667, "avg_total_tokens": 15452.192708333334, "avg_thinking_tokens": 0.0, "max_thinking_tokens": 0.0, "min_thinking_tokens": 0.0}, "math500": {"avg_k": 0.7485, "pass_k": 0.768, "avg_total_tokens": 4490.75, "avg_thinking_tokens": 0.0, "max_thinking_tokens": 0.0, "min_thinking_tokens": 0.0}, "minerva": {"avg_k": 0.3290441176470588, "pass_k": 0.38235294117647056, "avg_total_tokens": 6507.237132352941, "avg_thinking_tokens": 0.0, "max_thinking_tokens": 0.0, "min_thinking_tokens": 0.0}, "overall": {"avg_k": 0.6000676132521975, "pass_k": 0.6657223796033994, "avg_total_tokens": 9346.815584854632, "avg_thinking_tokens": 0.0, "max_thinking_tokens": 0.0, "min_thinking_tokens": 0.0}}}}
|
||
|
|
Traceback (most recent call last):
|
||
|
|
File "/usr/lib/python3.12/multiprocessing/resource_tracker.py", line 239, in main
|
||
|
|
cache[rtype].remove(name)
|
||
|
|
KeyError: '/mp-3hp1opqx'
|