5503 lines
940 KiB
Plaintext
5503 lines
940 KiB
Plaintext
[2026-04-24 00:49:08,013] [DEBUG] [axolotl.utils.config.resolve_dtype:74] [PID:248539] bf16 support detected, enabling for this configuration.
|
||
[2026-04-24 00:49:08,023] [DEBUG] [axolotl.utils.config.log_gpu_memory_usage:127] [PID:248539] baseline 0.000GB ()
|
||
[2026-04-24 00:49:08,023] [INFO] [axolotl.cli.config.load_cfg:341] [PID:248539] config:
|
||
{
|
||
"activation_offloading": true,
|
||
"adam_beta1": 0.9,
|
||
"adam_beta2": 0.95,
|
||
"axolotl_config_path": "/e/scratch/jureap59/feuer1/code/axolotl_configs/qwen3_8b_sera_v4_316.yaml",
|
||
"base_model": "Qwen/Qwen3-8B",
|
||
"base_model_config": "Qwen/Qwen3-8B",
|
||
"batch_size": 32,
|
||
"bf16": true,
|
||
"capabilities": {
|
||
"bf16": true,
|
||
"compute_capability": "sm_90",
|
||
"fp8": true,
|
||
"n_gpu": 4,
|
||
"n_node": 1,
|
||
"tf32": true
|
||
},
|
||
"chat_template": "tokenizer_default",
|
||
"context_parallel_size": 1,
|
||
"dataloader_num_workers": 4,
|
||
"dataloader_pin_memory": true,
|
||
"dataloader_prefetch_factor": 256,
|
||
"dataset_num_proc": 288,
|
||
"dataset_prepared_path": "/e/data1/datasets/playground/ot-baf/axolotl_dataset_cache/sera-v4-316",
|
||
"datasets": [
|
||
{
|
||
"chat_template": "tokenizer_default",
|
||
"ds_type": "json",
|
||
"field_messages": "messages",
|
||
"message_field_training": "train",
|
||
"message_property_mappings": {
|
||
"content": "content",
|
||
"role": "role"
|
||
},
|
||
"path": "/e/data1/datasets/playground/ot-baf/hf_hub/datasets--laion--Sera-4.6-Lite-T2-v4-316/snapshots/2df0e5321c676c5010ba43d4be6b74fb13dfe8a4/sera-4.6-lite-t2_v4_316.jsonl",
|
||
"trust_remote_code": false,
|
||
"type": "chat_template"
|
||
}
|
||
],
|
||
"ddp": true,
|
||
"deepspeed": {
|
||
"bf16": {
|
||
"enabled": true
|
||
},
|
||
"gradient_accumulation_steps": "auto",
|
||
"gradient_clipping": "auto",
|
||
"train_batch_size": "auto",
|
||
"train_micro_batch_size_per_gpu": "auto",
|
||
"wall_clock_breakdown": false,
|
||
"zero_optimization": {
|
||
"contiguous_gradients": true,
|
||
"gather_16bit_weights_on_model_save": true,
|
||
"max_live_parameters": 0,
|
||
"max_reuse_distance": 0,
|
||
"overlap_comm": true,
|
||
"reduce_bucket_size": "auto",
|
||
"stage": 3,
|
||
"stage3_param_persistence_threshold": "auto",
|
||
"stage3_prefetch_bucket_size": "auto",
|
||
"sub_group_size": 0
|
||
}
|
||
},
|
||
"device": "cuda:0",
|
||
"device_map": {
|
||
"": 0
|
||
},
|
||
"dion_rank_fraction": 1.0,
|
||
"dion_rank_multiple_of": 1,
|
||
"eaft_alpha": 1.0,
|
||
"eaft_k": 20,
|
||
"env_capabilities": {
|
||
"torch_version": "2.9.1"
|
||
},
|
||
"eval_batch_size": 1,
|
||
"eval_causal_lm_metrics": [
|
||
"sacrebleu",
|
||
"comet",
|
||
"ter",
|
||
"chrf"
|
||
],
|
||
"eval_max_new_tokens": 128,
|
||
"eval_table_size": 0,
|
||
"evals_per_epoch": 0,
|
||
"experimental_skip_move_to_device": true,
|
||
"flash_attention": true,
|
||
"fp16": false,
|
||
"generate_samples": false,
|
||
"generation_do_sample": true,
|
||
"generation_max_new_tokens": 50,
|
||
"generation_prompt_ratio": 0.5,
|
||
"generation_temperature": 0.7,
|
||
"gradient_accumulation_steps": 8,
|
||
"gradient_checkpointing": true,
|
||
"gradient_checkpointing_kwargs": {
|
||
"use_reentrant": true
|
||
},
|
||
"include_tkps": true,
|
||
"layer_offloading": false,
|
||
"learning_rate": 1e-05,
|
||
"lisa_layers_attribute": "model.layers",
|
||
"load_best_model_at_end": false,
|
||
"load_in_4bit": false,
|
||
"load_in_8bit": false,
|
||
"local_rank": 0,
|
||
"logging_steps": 1,
|
||
"lora_dropout": 0.0,
|
||
"loraplus_lr_embedding": 1e-06,
|
||
"loss_watchdog_patience": 3,
|
||
"loss_watchdog_threshold": 5.0,
|
||
"lr_scheduler": "cosine",
|
||
"max_grad_norm": 1.0,
|
||
"mean_resizing_embeddings": false,
|
||
"merge_method": "memory_efficient",
|
||
"micro_batch_size": 1,
|
||
"model_config_type": "qwen3",
|
||
"num_epochs": 3.0,
|
||
"num_generation_samples": 3,
|
||
"optimizer": "adamw_torch",
|
||
"otel_metrics_host": "localhost",
|
||
"otel_metrics_port": 8000,
|
||
"output_dir": "/e/data1/datasets/playground/ot-baf/checkpoints/sera-v4-316-axolotl__Qwen3-8B",
|
||
"pretrain_multipack_attn": true,
|
||
"profiler_steps_start": 0,
|
||
"qlora_sharded_model_loading": false,
|
||
"quantize_moe_experts": false,
|
||
"ray_num_workers": 1,
|
||
"resources_per_worker": {
|
||
"GPU": 1
|
||
},
|
||
"sample_packing_bin_size": 200,
|
||
"sample_packing_group_size": 100000,
|
||
"save_only_model": false,
|
||
"save_safetensors": true,
|
||
"save_strategy": "epoch",
|
||
"sequence_len": 32768,
|
||
"shuffle_before_merging_datasets": false,
|
||
"shuffle_merged_datasets": true,
|
||
"skip_prepare_dataset": false,
|
||
"streaming_multipack_buffer_size": 10000,
|
||
"strict": false,
|
||
"tensor_parallel_size": 1,
|
||
"tf32": false,
|
||
"tiled_mlp_use_original_mlp": true,
|
||
"tokenizer_config": "Qwen/Qwen3-8B",
|
||
"tokenizer_save_jinja_files": true,
|
||
"torch_dtype": "torch.bfloat16",
|
||
"train_on_inputs": false,
|
||
"trl": {
|
||
"async_prefetch": false,
|
||
"log_completions": false,
|
||
"mask_truncated_completions": false,
|
||
"ref_model_mixup_alpha": 0.9,
|
||
"ref_model_sync_steps": 64,
|
||
"replay_buffer_size": 0,
|
||
"replay_recompute_logps": true,
|
||
"reroll_max_groups": 1,
|
||
"reroll_start_fraction": 1.0,
|
||
"reward_num_workers": 1,
|
||
"scale_rewards": true,
|
||
"skip_zero_advantage_batches": true,
|
||
"sync_ref_model": false,
|
||
"use_data_producer": false,
|
||
"use_vllm": false,
|
||
"vllm_lora_sync": false,
|
||
"vllm_server_host": "0.0.0.0",
|
||
"vllm_server_port": 8000
|
||
},
|
||
"use_otel_metrics": false,
|
||
"use_ray": false,
|
||
"val_set_size": 0.0,
|
||
"vllm": {
|
||
"device": "auto",
|
||
"dtype": "auto",
|
||
"gpu_memory_utilization": 0.9,
|
||
"host": "0.0.0.0",
|
||
"port": 8000
|
||
},
|
||
"wandb_name": "sera-v4-316-axolotl__Qwen3-8B",
|
||
"warmup_ratio": 0.1875,
|
||
"weight_decay": 0.01,
|
||
"world_size": 4
|
||
}
|
||
[2026-04-24 00:49:08,039] [INFO] [axolotl.cli.checks.check_user_token:37] [PID:248539] Skipping HuggingFace token verification because HF_HUB_OFFLINE is set to True. Only local files will be used.
|
||
[2026-04-24 00:49:08,512] [DEBUG] [axolotl.loaders.tokenizer.load_tokenizer:307] [PID:248539] EOS: 151645 / <|im_end|>
|
||
[2026-04-24 00:49:08,513] [DEBUG] [axolotl.loaders.tokenizer.load_tokenizer:308] [PID:248539] BOS: None / None
|
||
[2026-04-24 00:49:08,513] [DEBUG] [axolotl.loaders.tokenizer.load_tokenizer:309] [PID:248539] PAD: 151643 / <|endoftext|>
|
||
[2026-04-24 00:49:08,513] [DEBUG] [axolotl.loaders.tokenizer.load_tokenizer:310] [PID:248539] UNK: None / None
|
||
[2026-04-24 00:49:08,543] [INFO] [axolotl.utils.data.shared.load_preprocessed_dataset:480] [PID:248539] Unable to find prepared dataset in /e/data1/datasets/playground/ot-baf/axolotl_dataset_cache/sera-v4-316/6a9b456bfb3853b790bb0bd3a8fbe97e
|
||
[2026-04-24 00:49:08,543] [INFO] [axolotl.utils.data.sft._load_raw_datasets:320] [PID:248539] Loading raw datasets...
|
||
[2026-04-24 00:49:08,543] [WARNING] [axolotl.utils.data.sft._load_raw_datasets:322] [PID:248539] Processing datasets during training can lead to VRAM instability. Please pre-process your dataset using `axolotl preprocess path/to/config.yml`.
|
||
|
||
Generating train split: 0 examples [00:00, ? examples/s]
|
||
Generating train split: 32 examples [00:00, 283.23 examples/s]
|
||
Generating train split: 75 examples [00:00, 274.78 examples/s]
|
||
Generating train split: 316 examples [00:00, 863.83 examples/s]
|
||
[2026-04-24 00:49:09,109] [INFO] [axolotl.utils.data.wrappers.get_dataset_wrapper:87] [PID:248539] Loading dataset: /e/data1/datasets/playground/ot-baf/hf_hub/datasets--laion--Sera-4.6-Lite-T2-v4-316/snapshots/2df0e5321c676c5010ba43d4be6b74fb13dfe8a4/sera-4.6-lite-t2_v4_316.jsonl with base_type: chat_template and prompt_style: None
|
||
[2026-04-24 00:49:09,155] [INFO] [axolotl.prompt_strategies.chat_template.__call__:998] [PID:248539] Using chat template:
|
||
---
|
||
{%- if tools %}
|
||
{{- '<|im_start|>system\n' }}
|
||
{%- if messages[0].role == 'system' %}
|
||
{{- messages[0].content + '\n\n' }}
|
||
{%- endif %}
|
||
{{- "# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within <tools></tools> XML tags:\n<tools>" }}
|
||
{%- for tool in tools %}
|
||
{{- "\n" }}
|
||
{{- tool | tojson }}
|
||
{%- endfor %}
|
||
{{- "\n</tools>\n\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\n<tool_call>\n{\"name\": <function-name>, \"arguments\": <args-json-object>}\n</tool_call><|im_end|>\n" }}
|
||
{%- else %}
|
||
{%- if messages[0].role == 'system' %}
|
||
{{- '<|im_start|>system\n' + messages[0].content + '<|im_end|>\n' }}
|
||
{%- endif %}
|
||
{%- endif %}
|
||
{%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %}
|
||
{%- for message in messages[::-1] %}
|
||
{%- set index = (messages|length - 1) - loop.index0 %}
|
||
{%- if ns.multi_step_tool and message.role == "user" and message.content is string and not(message.content.startswith('<tool_response>') and message.content.endswith('</tool_response>')) %}
|
||
{%- set ns.multi_step_tool = false %}
|
||
{%- set ns.last_query_index = index %}
|
||
{%- endif %}
|
||
{%- endfor %}
|
||
{%- for message in messages %}
|
||
{%- if message.content is string %}
|
||
{%- set content = message.content %}
|
||
{%- else %}
|
||
{%- set content = '' %}
|
||
{%- endif %}
|
||
{%- if (message.role == "user") or (message.role == "system" and not loop.first) %}
|
||
{{- '<|im_start|>' + message.role + '\n' + content + '<|im_end|>' + '\n' }}
|
||
{%- elif message.role == "assistant" %}
|
||
{%- set reasoning_content = '' %}
|
||
{%- if message.reasoning_content is string %}
|
||
{%- set reasoning_content = message.reasoning_content %}
|
||
{%- else %}
|
||
{%- if '</think>' in content %}
|
||
{%- set reasoning_content = content.split('</think>')[0].rstrip('\n').split('<think>')[-1].lstrip('\n') %}
|
||
{%- set content = content.split('</think>')[-1].lstrip('\n') %}
|
||
{%- endif %}
|
||
{%- endif %}
|
||
{%- if loop.index0 > ns.last_query_index %}
|
||
{%- if loop.last or (not loop.last and reasoning_content) %}
|
||
{{- '<|im_start|>' + message.role + '\n<think>\n' + reasoning_content.strip('\n') + '\n</think>\n\n' + content.lstrip('\n') }}
|
||
{%- else %}
|
||
{{- '<|im_start|>' + message.role + '\n' + content }}
|
||
{%- endif %}
|
||
{%- else %}
|
||
{{- '<|im_start|>' + message.role + '\n' + content }}
|
||
{%- endif %}
|
||
{%- if message.tool_calls %}
|
||
{%- for tool_call in message.tool_calls %}
|
||
{%- if (loop.first and content) or (not loop.first) %}
|
||
{{- '\n' }}
|
||
{%- endif %}
|
||
{%- if tool_call.function %}
|
||
{%- set tool_call = tool_call.function %}
|
||
{%- endif %}
|
||
{{- '<tool_call>\n{"name": "' }}
|
||
{{- tool_call.name }}
|
||
{{- '", "arguments": ' }}
|
||
{%- if tool_call.arguments is string %}
|
||
{{- tool_call.arguments }}
|
||
{%- else %}
|
||
{{- tool_call.arguments | tojson }}
|
||
{%- endif %}
|
||
{{- '}\n</tool_call>' }}
|
||
{%- endfor %}
|
||
{%- endif %}
|
||
{{- '<|im_end|>\n' }}
|
||
{%- elif message.role == "tool" %}
|
||
{%- if loop.first or (messages[loop.index0 - 1].role != "tool") %}
|
||
{{- '<|im_start|>user' }}
|
||
{%- endif %}
|
||
{{- '\n<tool_response>\n' }}
|
||
{{- content }}
|
||
{{- '\n</tool_response>' }}
|
||
{%- if loop.last or (messages[loop.index0 + 1].role != "tool") %}
|
||
{{- '<|im_end|>\n' }}
|
||
{%- endif %}
|
||
{%- endif %}
|
||
{%- endfor %}
|
||
{%- if add_generation_prompt %}
|
||
{{- '<|im_start|>assistant\n' }}
|
||
{%- if enable_thinking is defined and enable_thinking is false %}
|
||
{{- '<think>\n\n</think>\n\n' }}
|
||
{%- endif %}
|
||
{%- endif %}
|
||
---
|
||
|
||
Tokenizing Prompts (num_proc=288): 0%| | 0/316 [00:00<?, ? examples/s][2026-04-24 00:49:24,862] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248773] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
[2026-04-24 00:49:25,398] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248772] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
[2026-04-24 00:49:25,921] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248774] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
[2026-04-24 00:49:26,989] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248773] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 1%| | 2/316 [00:07<20:02, 3.83s/ examples][2026-04-24 00:49:27,158] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248774] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 1%|▏ | 4/316 [00:07<08:27, 1.63s/ examples][2026-04-24 00:49:27,238] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248775] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
[2026-04-24 00:49:27,812] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248776] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
[2026-04-24 00:49:28,463] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248775] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 2%|▏ | 6/316 [00:09<06:06, 1.18s/ examples][2026-04-24 00:49:28,508] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248772] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
[2026-04-24 00:49:29,068] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248777] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
[2026-04-24 00:49:29,137] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248776] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 3%|▎ | 10/316 [00:09<03:06, 1.64 examples/s][2026-04-24 00:49:29,629] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248777] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 4%|▍ | 12/316 [00:10<02:33, 1.98 examples/s][2026-04-24 00:49:31,127] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248779] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
[2026-04-24 00:49:32,422] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248778] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
[2026-04-24 00:49:32,988] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248779] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 4%|▍ | 14/316 [00:13<04:15, 1.18 examples/s][2026-04-24 00:49:33,844] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248781] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
[2026-04-24 00:49:34,505] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248780] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
[2026-04-24 00:49:34,704] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248778] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 5%|▌ | 16/316 [00:15<04:15, 1.18 examples/s][2026-04-24 00:49:35,124] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248781] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 6%|▌ | 18/316 [00:15<03:18, 1.50 examples/s][2026-04-24 00:49:35,485] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248780] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 6%|▋ | 20/316 [00:16<02:32, 1.94 examples/s][2026-04-24 00:49:35,640] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248782] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
[2026-04-24 00:49:36,287] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248784] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
[2026-04-24 00:49:36,632] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248783] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
[2026-04-24 00:49:37,043] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248784] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 7%|▋ | 22/316 [00:17<02:56, 1.66 examples/s][2026-04-24 00:49:37,884] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248785] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
[2026-04-24 00:49:38,562] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248783] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 8%|▊ | 24/316 [00:19<03:07, 1.56 examples/s][2026-04-24 00:49:38,652] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248786] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
[2026-04-24 00:49:39,176] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248785] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 8%|▊ | 26/316 [00:19<02:36, 1.85 examples/s][2026-04-24 00:49:40,451] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248788] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
[2026-04-24 00:49:40,961] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248782] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 9%|▉ | 28/316 [00:21<03:06, 1.54 examples/s][2026-04-24 00:49:41,111] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248789] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
[2026-04-24 00:49:41,454] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248790] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
[2026-04-24 00:49:41,816] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248789] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 9%|▉ | 30/316 [00:22<02:45, 1.73 examples/s][2026-04-24 00:49:42,204] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248788] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 10%|█ | 32/316 [00:22<02:12, 2.15 examples/s][2026-04-24 00:49:42,397] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248790] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 11%|█ | 34/316 [00:23<01:40, 2.82 examples/s][2026-04-24 00:49:43,273] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248792] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
[2026-04-24 00:49:44,428] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248791] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
[2026-04-24 00:49:45,276] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248793] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
[2026-04-24 00:49:45,586] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248791] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 11%|█▏ | 36/316 [00:26<03:23, 1.38 examples/s][2026-04-24 00:49:45,809] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248795] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
[2026-04-24 00:49:46,061] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248792] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 12%|█▏ | 38/316 [00:26<02:41, 1.72 examples/s][2026-04-24 00:49:46,461] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248793] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 13%|█▎ | 40/316 [00:27<02:08, 2.15 examples/s][2026-04-24 00:49:48,083] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248797] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
[2026-04-24 00:49:48,111] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248795] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 13%|█▎ | 42/316 [00:28<02:36, 1.75 examples/s][2026-04-24 00:49:49,036] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248794] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
[2026-04-24 00:49:50,012] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248798] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
[2026-04-24 00:49:50,362] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248797] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 14%|█▍ | 44/316 [00:31<03:21, 1.35 examples/s][2026-04-24 00:49:50,474] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248794] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 15%|█▍ | 46/316 [00:31<02:24, 1.87 examples/s][2026-04-24 00:49:51,499] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248798] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 15%|█▌ | 48/316 [00:32<02:21, 1.89 examples/s][2026-04-24 00:49:52,445] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248796] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
[2026-04-24 00:49:53,239] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248801] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 16%|█▌ | 49/316 [00:33<03:18, 1.35 examples/s][2026-04-24 00:49:53,556] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248796] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 16%|█▌ | 51/316 [00:34<02:24, 1.83 examples/s][2026-04-24 00:49:53,600] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248800] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
[2026-04-24 00:49:53,829] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248787] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
[2026-04-24 00:49:55,924] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248803] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 17%|█▋ | 53/316 [00:36<03:20, 1.31 examples/s][2026-04-24 00:49:56,351] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248786] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 17%|█▋ | 55/316 [00:37<02:31, 1.73 examples/s][2026-04-24 00:49:56,402] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248799] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
[2026-04-24 00:49:56,557] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248804] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 18%|█▊ | 56/316 [00:37<02:12, 1.96 examples/s][2026-04-24 00:49:56,635] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248806] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
[2026-04-24 00:49:58,255] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248802] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 18%|█▊ | 58/316 [00:38<02:41, 1.59 examples/s][2026-04-24 00:49:58,565] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248805] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 19%|█▊ | 59/316 [00:39<02:25, 1.77 examples/s][2026-04-24 00:49:59,008] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248807] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
[2026-04-24 00:49:59,041] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248808] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 19%|█▉ | 60/316 [00:39<02:17, 1.86 examples/s][2026-04-24 00:49:59,221] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248799] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 20%|█▉ | 63/316 [00:39<01:19, 3.19 examples/s][2026-04-24 00:50:00,046] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248787] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 21%|██ | 65/316 [00:40<01:26, 2.91 examples/s][2026-04-24 00:50:01,904] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248810] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 21%|██ | 66/316 [00:42<02:35, 1.61 examples/s][2026-04-24 00:50:02,398] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248812] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 21%|██ | 67/316 [00:43<02:27, 1.69 examples/s][2026-04-24 00:50:03,522] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248809] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
[2026-04-24 00:50:03,586] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248811] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 22%|██▏ | 68/316 [00:44<03:01, 1.36 examples/s][2026-04-24 00:50:05,123] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248813] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 22%|██▏ | 70/316 [00:45<03:03, 1.34 examples/s][2026-04-24 00:50:05,282] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248815] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 22%|██▏ | 71/316 [00:45<02:30, 1.62 examples/s][2026-04-24 00:50:05,321] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248814] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
[2026-04-24 00:50:07,347] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248817] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 23%|██▎ | 73/316 [00:48<03:08, 1.29 examples/s][2026-04-24 00:50:07,723] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248816] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 23%|██▎ | 74/316 [00:48<02:47, 1.45 examples/s][2026-04-24 00:50:12,430] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248818] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 24%|██▎ | 75/316 [00:53<06:35, 1.64s/ examples][2026-04-24 00:50:12,782] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248822] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 24%|██▍ | 76/316 [00:53<05:15, 1.31s/ examples][2026-04-24 00:50:13,168] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248823] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 24%|██▍ | 77/316 [00:53<04:14, 1.07s/ examples][2026-04-24 00:50:13,918] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248821] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 25%|██▍ | 78/316 [00:54<03:55, 1.01 examples/s][2026-04-24 00:50:14,713] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248825] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 25%|██▌ | 79/316 [00:55<03:39, 1.08 examples/s][2026-04-24 00:50:15,043] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248824] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 25%|██▌ | 80/316 [00:55<02:58, 1.33 examples/s][2026-04-24 00:50:16,920] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248826] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 26%|██▌ | 81/316 [00:57<04:14, 1.08s/ examples][2026-04-24 00:50:17,143] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248827] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 26%|██▌ | 82/316 [00:57<03:13, 1.21 examples/s][2026-04-24 00:50:18,440] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248828] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 26%|██▋ | 83/316 [00:59<03:45, 1.03 examples/s][2026-04-24 00:50:18,692] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248819] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 27%|██▋ | 84/316 [00:59<02:55, 1.32 examples/s][2026-04-24 00:50:19,179] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248820] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 27%|██▋ | 85/316 [00:59<02:36, 1.48 examples/s][2026-04-24 00:50:19,589] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248829] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 27%|██▋ | 86/316 [01:00<02:19, 1.65 examples/s][2026-04-24 00:50:19,690] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248830] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
[2026-04-24 00:50:21,504] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248831] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 28%|██▊ | 88/316 [01:02<02:53, 1.32 examples/s][2026-04-24 00:50:23,016] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248832] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 28%|██▊ | 89/316 [01:03<03:34, 1.06 examples/s][2026-04-24 00:50:23,198] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248833] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 28%|██▊ | 90/316 [01:03<02:48, 1.34 examples/s][2026-04-24 00:50:23,468] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248834] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 29%|██▉ | 91/316 [01:04<02:19, 1.62 examples/s][2026-04-24 00:50:25,648] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248835] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 29%|██▉ | 92/316 [01:06<03:56, 1.05s/ examples][2026-04-24 00:50:26,584] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248837] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 29%|██▉ | 93/316 [01:07<03:47, 1.02s/ examples][2026-04-24 00:50:26,738] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248836] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 30%|██▉ | 94/316 [01:07<02:51, 1.30 examples/s][2026-04-24 00:50:27,115] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248838] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 30%|███ | 95/316 [01:07<02:24, 1.53 examples/s][2026-04-24 00:50:28,505] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248839] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 30%|███ | 96/316 [01:09<03:11, 1.15 examples/s][2026-04-24 00:50:29,432] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248840] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 31%|███ | 97/316 [01:10<03:14, 1.13 examples/s][2026-04-24 00:50:30,855] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248841] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 31%|███ | 98/316 [01:11<03:48, 1.05s/ examples][2026-04-24 00:50:31,621] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248842] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 31%|███▏ | 99/316 [01:12<03:29, 1.03 examples/s][2026-04-24 00:50:34,180] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248843] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 32%|███▏ | 100/316 [01:14<05:10, 1.44s/ examples][2026-04-24 00:50:34,382] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248844] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 32%|███▏ | 101/316 [01:15<03:52, 1.08s/ examples][2026-04-24 00:50:35,061] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248846] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 32%|███▏ | 102/316 [01:15<03:22, 1.06 examples/s][2026-04-24 00:50:35,862] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248845] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 33%|███▎ | 103/316 [01:16<03:12, 1.10 examples/s][2026-04-24 00:50:39,282] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248849] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 33%|███▎ | 104/316 [01:19<05:51, 1.66s/ examples][2026-04-24 00:50:40,955] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248853] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 33%|███▎ | 105/316 [01:21<05:50, 1.66s/ examples][2026-04-24 00:50:41,296] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248852] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 34%|███▎ | 106/316 [01:21<04:25, 1.27s/ examples][2026-04-24 00:50:41,354] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248850] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
[2026-04-24 00:50:41,375] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248848] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 34%|███▍ | 108/316 [01:22<02:26, 1.42 examples/s][2026-04-24 00:50:42,451] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248855] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 34%|███▍ | 109/316 [01:23<02:43, 1.27 examples/s][2026-04-24 00:50:42,636] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248851] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 35%|███▍ | 110/316 [01:23<02:10, 1.58 examples/s][2026-04-24 00:50:45,430] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248856] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 35%|███▌ | 111/316 [01:26<04:10, 1.22s/ examples][2026-04-24 00:50:45,487] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248854] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
[2026-04-24 00:50:46,018] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248857] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 36%|███▌ | 113/316 [01:26<02:45, 1.23 examples/s][2026-04-24 00:50:48,358] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248860] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 36%|███▌ | 114/316 [01:29<03:57, 1.18s/ examples][2026-04-24 00:50:48,531] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248859] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
[2026-04-24 00:50:48,558] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248858] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 36%|███▋ | 115/316 [01:29<03:06, 1.08 examples/s][2026-04-24 00:50:49,717] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248861] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 37%|███▋ | 117/316 [01:30<02:37, 1.26 examples/s][2026-04-24 00:50:50,315] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248847] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 37%|███▋ | 118/316 [01:30<02:26, 1.35 examples/s][2026-04-24 00:50:50,914] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248863] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 38%|███▊ | 119/316 [01:31<02:18, 1.43 examples/s][2026-04-24 00:50:52,423] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248865] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 38%|███▊ | 120/316 [01:33<02:58, 1.10 examples/s][2026-04-24 00:50:53,707] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248864] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 38%|███▊ | 121/316 [01:34<03:18, 1.02s/ examples][2026-04-24 00:50:55,117] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248867] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 39%|███▊ | 122/316 [01:35<03:38, 1.12s/ examples][2026-04-24 00:50:55,917] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248868] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 39%|███▉ | 123/316 [01:36<03:18, 1.03s/ examples][2026-04-24 00:50:56,549] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248866] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 39%|███▉ | 124/316 [01:37<02:56, 1.09 examples/s][2026-04-24 00:50:56,946] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248869] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 40%|███▉ | 125/316 [01:37<02:25, 1.31 examples/s][2026-04-24 00:50:57,789] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248870] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 40%|███▉ | 126/316 [01:38<02:29, 1.27 examples/s][2026-04-24 00:51:00,140] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248872] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
[2026-04-24 00:51:00,186] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248862] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
[2026-04-24 00:51:00,208] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248871] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 40%|████ | 127/316 [01:40<04:01, 1.28s/ examples][2026-04-24 00:51:01,851] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248874] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 41%|████ | 130/316 [01:42<02:41, 1.16 examples/s][2026-04-24 00:51:02,813] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248875] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 41%|████▏ | 131/316 [01:43<02:43, 1.13 examples/s][2026-04-24 00:51:03,014] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248876] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 42%|████▏ | 132/316 [01:43<02:13, 1.38 examples/s][2026-04-24 00:51:05,479] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248877] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 42%|████▏ | 133/316 [01:46<03:32, 1.16s/ examples][2026-04-24 00:51:06,569] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248878] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 42%|████▏ | 134/316 [01:47<03:27, 1.14s/ examples][2026-04-24 00:51:07,706] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248880] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 43%|████▎ | 135/316 [01:48<03:27, 1.15s/ examples][2026-04-24 00:51:08,174] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248881] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 43%|████▎ | 136/316 [01:48<02:51, 1.05 examples/s][2026-04-24 00:51:08,271] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248879] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 43%|████▎ | 137/316 [01:48<02:06, 1.41 examples/s][2026-04-24 00:51:09,095] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248882] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 44%|████▎ | 138/316 [01:49<02:11, 1.35 examples/s][2026-04-24 00:51:11,078] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248883] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 44%|████▍ | 139/316 [01:51<03:15, 1.10s/ examples][2026-04-24 00:51:12,456] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248884] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 44%|████▍ | 140/316 [01:53<03:31, 1.20s/ examples][2026-04-24 00:51:12,602] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248885] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
[2026-04-24 00:51:13,470] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248886] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 45%|████▍ | 142/316 [01:54<02:31, 1.15 examples/s][2026-04-24 00:51:14,785] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248873] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 45%|████▌ | 143/316 [01:55<02:49, 1.02 examples/s][2026-04-24 00:51:15,278] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248888] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 46%|████▌ | 144/316 [01:55<02:26, 1.17 examples/s][2026-04-24 00:51:16,038] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248889] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 46%|████▌ | 145/316 [01:56<02:21, 1.21 examples/s][2026-04-24 00:51:16,414] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248890] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 46%|████▌ | 146/316 [01:57<01:58, 1.43 examples/s][2026-04-24 00:51:17,063] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248887] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 47%|████▋ | 147/316 [01:57<01:56, 1.45 examples/s][2026-04-24 00:51:19,375] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248891] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 47%|████▋ | 148/316 [02:00<03:14, 1.16s/ examples][2026-04-24 00:51:20,079] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248894] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 47%|████▋ | 149/316 [02:00<02:50, 1.02s/ examples][2026-04-24 00:51:20,344] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248893] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 47%|████▋ | 150/316 [02:01<02:13, 1.25 examples/s][2026-04-24 00:51:21,234] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248892] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 48%|████▊ | 151/316 [02:01<02:17, 1.20 examples/s][2026-04-24 00:51:21,753] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248895] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 48%|████▊ | 152/316 [02:02<02:00, 1.36 examples/s][2026-04-24 00:51:23,157] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248897] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 48%|████▊ | 153/316 [02:03<02:32, 1.07 examples/s][2026-04-24 00:51:23,360] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248896] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 49%|████▊ | 154/316 [02:04<01:56, 1.39 examples/s][2026-04-24 00:51:23,764] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248898] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 49%|████▉ | 155/316 [02:04<01:39, 1.61 examples/s][2026-04-24 00:51:24,687] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248899] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 49%|████▉ | 156/316 [02:05<01:53, 1.41 examples/s][2026-04-24 00:51:25,536] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248900] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 50%|████▉ | 157/316 [02:06<02:01, 1.30 examples/s][2026-04-24 00:51:27,328] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248901] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 50%|█████ | 158/316 [02:07<02:47, 1.06s/ examples][2026-04-24 00:51:29,378] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248902] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 50%|█████ | 159/316 [02:10<03:33, 1.36s/ examples][2026-04-24 00:51:31,603] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248905] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 51%|█████ | 160/316 [02:12<04:12, 1.62s/ examples][2026-04-24 00:51:31,633] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248904] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
[2026-04-24 00:51:32,692] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248907] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 51%|█████▏ | 162/316 [02:13<02:52, 1.12s/ examples][2026-04-24 00:51:33,482] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248908] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 52%|█████▏ | 163/316 [02:14<02:39, 1.04s/ examples][2026-04-24 00:51:33,933] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248909] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 52%|█████▏ | 164/316 [02:14<02:14, 1.13 examples/s][2026-04-24 00:51:34,151] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248903] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 52%|█████▏ | 165/316 [02:14<01:46, 1.42 examples/s][2026-04-24 00:51:34,851] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248906] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 53%|█████▎ | 166/316 [02:15<01:45, 1.42 examples/s][2026-04-24 00:51:35,330] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248910] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 53%|█████▎ | 167/316 [02:15<01:35, 1.57 examples/s][2026-04-24 00:51:36,953] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248912] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 53%|█████▎ | 168/316 [02:17<02:16, 1.08 examples/s][2026-04-24 00:51:37,280] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248911] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 53%|█████▎ | 169/316 [02:17<01:50, 1.34 examples/s][2026-04-24 00:51:39,238] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248913] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 54%|█████▍ | 170/316 [02:19<02:41, 1.11s/ examples][2026-04-24 00:51:39,825] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248915] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 54%|█████▍ | 171/316 [02:20<02:18, 1.05 examples/s][2026-04-24 00:51:41,765] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248914] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 54%|█████▍ | 172/316 [02:22<02:59, 1.25s/ examples][2026-04-24 00:51:41,904] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248916] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 55%|█████▍ | 173/316 [02:22<02:10, 1.09 examples/s][2026-04-24 00:51:42,767] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248918] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 55%|█████▌ | 174/316 [02:23<02:07, 1.11 examples/s][2026-04-24 00:51:42,865] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248917] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 55%|█████▌ | 175/316 [02:23<01:33, 1.51 examples/s][2026-04-24 00:51:44,643] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248920] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 56%|█████▌ | 176/316 [02:25<02:19, 1.01 examples/s][2026-04-24 00:51:45,750] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248921] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 56%|█████▌ | 177/316 [02:26<02:22, 1.03s/ examples][2026-04-24 00:51:50,444] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248926] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 56%|█████▋ | 178/316 [02:31<04:53, 2.13s/ examples][2026-04-24 00:51:51,355] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248923] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 57%|█████▋ | 179/316 [02:32<04:01, 1.76s/ examples][2026-04-24 00:51:52,430] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248928] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 57%|█████▋ | 180/316 [02:33<03:31, 1.56s/ examples][2026-04-24 00:51:53,145] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248927] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 57%|█████▋ | 181/316 [02:33<02:58, 1.32s/ examples][2026-04-24 00:51:54,466] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248929] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 58%|█████▊ | 182/316 [02:35<02:54, 1.30s/ examples][2026-04-24 00:51:54,527] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248930] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
[2026-04-24 00:51:54,529] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248922] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 58%|█████▊ | 184/316 [02:35<01:36, 1.37 examples/s][2026-04-24 00:51:55,410] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248924] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 59%|█████▊ | 185/316 [02:36<01:38, 1.32 examples/s][2026-04-24 00:51:56,831] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248931] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 59%|█████▉ | 186/316 [02:37<02:00, 1.08 examples/s][2026-04-24 00:51:56,963] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248932] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 59%|█████▉ | 187/316 [02:37<01:31, 1.41 examples/s][2026-04-24 00:51:57,406] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248919] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 59%|█████▉ | 188/316 [02:38<01:21, 1.57 examples/s][2026-04-24 00:51:58,357] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248934] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 60%|█████▉ | 189/316 [02:39<01:32, 1.38 examples/s][2026-04-24 00:51:59,557] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248933] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 60%|██████ | 190/316 [02:40<01:48, 1.16 examples/s][2026-04-24 00:52:00,198] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248935] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 60%|██████ | 191/316 [02:40<01:39, 1.25 examples/s][2026-04-24 00:52:01,662] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248937] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 61%|██████ | 192/316 [02:42<02:03, 1.01 examples/s][2026-04-24 00:52:01,815] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248938] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 61%|██████ | 193/316 [02:42<01:31, 1.34 examples/s][2026-04-24 00:52:02,968] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248936] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 61%|██████▏ | 194/316 [02:43<01:45, 1.15 examples/s][2026-04-24 00:52:03,057] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248925] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 62%|██████▏ | 195/316 [02:43<01:17, 1.56 examples/s][2026-04-24 00:52:04,401] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248940] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 62%|██████▏ | 196/316 [02:45<01:41, 1.18 examples/s][2026-04-24 00:52:06,328] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248941] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 62%|██████▏ | 197/316 [02:46<02:19, 1.17s/ examples][2026-04-24 00:52:06,496] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248939] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 63%|██████▎ | 198/316 [02:47<01:42, 1.15 examples/s][2026-04-24 00:52:06,696] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248943] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 63%|██████▎ | 199/316 [02:47<01:20, 1.46 examples/s][2026-04-24 00:52:06,793] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248942] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
[2026-04-24 00:52:08,085] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248944] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 64%|██████▎ | 201/316 [02:48<01:17, 1.48 examples/s][2026-04-24 00:52:08,805] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248945] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 64%|██████▍ | 202/316 [02:49<01:18, 1.46 examples/s][2026-04-24 00:52:09,645] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248946] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 64%|██████▍ | 203/316 [02:50<01:22, 1.38 examples/s][2026-04-24 00:52:11,341] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248947] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 65%|██████▍ | 204/316 [02:52<01:51, 1.00 examples/s][2026-04-24 00:52:11,655] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248948] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 65%|██████▍ | 205/316 [02:52<01:28, 1.25 examples/s][2026-04-24 00:52:12,384] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248949] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 65%|██████▌ | 206/316 [02:53<01:25, 1.29 examples/s][2026-04-24 00:52:15,738] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248951] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 66%|██████▌ | 207/316 [02:56<02:46, 1.52s/ examples][2026-04-24 00:52:15,779] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248950] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
[2026-04-24 00:52:16,063] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248953] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 66%|██████▌ | 209/316 [02:56<01:37, 1.10 examples/s][2026-04-24 00:52:16,338] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248952] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 66%|██████▋ | 210/316 [02:57<01:19, 1.33 examples/s][2026-04-24 00:52:18,289] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248954] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 67%|██████▋ | 211/316 [02:58<01:51, 1.06s/ examples][2026-04-24 00:52:18,389] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248955] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 67%|██████▋ | 212/316 [02:59<01:23, 1.24 examples/s][2026-04-24 00:52:19,500] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248956] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 67%|██████▋ | 213/316 [03:00<01:31, 1.13 examples/s][2026-04-24 00:52:19,880] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248957] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 68%|██████▊ | 214/316 [03:00<01:15, 1.35 examples/s][2026-04-24 00:52:21,316] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248958] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 68%|██████▊ | 215/316 [03:01<01:35, 1.06 examples/s][2026-04-24 00:52:21,755] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248959] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 68%|██████▊ | 216/316 [03:02<01:19, 1.26 examples/s][2026-04-24 00:52:24,698] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248961] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 69%|██████▊ | 217/316 [03:05<02:21, 1.43s/ examples][2026-04-24 00:52:25,913] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248960] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 69%|██████▉ | 218/316 [03:06<02:13, 1.37s/ examples][2026-04-24 00:52:26,543] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248963] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 69%|██████▉ | 219/316 [03:07<01:51, 1.15s/ examples][2026-04-24 00:52:27,311] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248962] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 70%|██████▉ | 220/316 [03:07<01:39, 1.03s/ examples][2026-04-24 00:52:27,605] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248966] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 70%|██████▉ | 221/316 [03:08<01:16, 1.24 examples/s][2026-04-24 00:52:28,747] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248965] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 70%|███████ | 222/316 [03:09<01:25, 1.10 examples/s][2026-04-24 00:52:30,108] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248967] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 71%|███████ | 223/316 [03:10<01:37, 1.05s/ examples][2026-04-24 00:52:31,056] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248968] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 71%|███████ | 224/316 [03:11<01:34, 1.03s/ examples][2026-04-24 00:52:31,227] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248969] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 71%|███████ | 225/316 [03:11<01:09, 1.31 examples/s][2026-04-24 00:52:32,930] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248970] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 72%|███████▏ | 226/316 [03:13<01:34, 1.05s/ examples][2026-04-24 00:52:33,558] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248971] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 72%|███████▏ | 227/316 [03:14<01:21, 1.09 examples/s][2026-04-24 00:52:34,520] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248972] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 72%|███████▏ | 228/316 [03:15<01:21, 1.08 examples/s][2026-04-24 00:52:35,687] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248973] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 72%|███████▏ | 229/316 [03:16<01:26, 1.00 examples/s][2026-04-24 00:52:35,774] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248974] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 73%|███████▎ | 230/316 [03:16<01:03, 1.36 examples/s][2026-04-24 00:52:35,913] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248964] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 73%|███████▎ | 231/316 [03:16<00:46, 1.82 examples/s][2026-04-24 00:52:37,064] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248975] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 73%|███████▎ | 232/316 [03:17<01:01, 1.38 examples/s][2026-04-24 00:52:37,625] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248976] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 74%|███████▎ | 233/316 [03:18<00:56, 1.48 examples/s][2026-04-24 00:52:38,544] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248977] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 74%|███████▍ | 234/316 [03:19<01:01, 1.33 examples/s][2026-04-24 00:52:41,729] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248980] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 74%|███████▍ | 235/316 [03:22<02:00, 1.48s/ examples][2026-04-24 00:52:41,846] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248979] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 75%|███████▍ | 236/316 [03:22<01:25, 1.07s/ examples][2026-04-24 00:52:42,818] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248982] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 75%|███████▌ | 237/316 [03:23<01:22, 1.04s/ examples][2026-04-24 00:52:44,499] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248978] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 75%|███████▌ | 238/316 [03:25<01:36, 1.24s/ examples][2026-04-24 00:52:44,541] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248983] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
[2026-04-24 00:52:44,786] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248981] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 76%|███████▌ | 240/316 [03:25<00:55, 1.37 examples/s][2026-04-24 00:52:45,455] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248984] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 76%|███████▋ | 241/316 [03:26<00:53, 1.40 examples/s][2026-04-24 00:52:47,321] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248985] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 77%|███████▋ | 242/316 [03:27<01:15, 1.02s/ examples][2026-04-24 00:52:48,431] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248986] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 77%|███████▋ | 243/316 [03:29<01:15, 1.04s/ examples][2026-04-24 00:52:49,636] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248988] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 77%|███████▋ | 244/316 [03:30<01:18, 1.09s/ examples][2026-04-24 00:52:50,799] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248990] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 78%|███████▊ | 245/316 [03:31<01:18, 1.11s/ examples][2026-04-24 00:52:51,176] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248989] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 78%|███████▊ | 246/316 [03:31<01:02, 1.12 examples/s][2026-04-24 00:52:53,585] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248992] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 78%|███████▊ | 247/316 [03:34<01:32, 1.34s/ examples][2026-04-24 00:52:54,356] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248993] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 78%|███████▊ | 248/316 [03:35<01:19, 1.17s/ examples][2026-04-24 00:52:54,954] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248994] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 79%|███████▉ | 249/316 [03:35<01:07, 1.00s/ examples][2026-04-24 00:52:56,949] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248997] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 79%|███████▉ | 250/316 [03:37<01:25, 1.30s/ examples][2026-04-24 00:52:57,504] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248996] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
[2026-04-24 00:52:57,537] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248987] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 79%|███████▉ | 251/316 [03:38<01:11, 1.10s/ examples][2026-04-24 00:52:58,323] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248991] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 80%|████████ | 253/316 [03:38<00:48, 1.31 examples/s][2026-04-24 00:52:58,815] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248998] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 80%|████████ | 254/316 [03:39<00:43, 1.44 examples/s][2026-04-24 00:52:59,681] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248999] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 81%|████████ | 255/316 [03:40<00:45, 1.35 examples/s][2026-04-24 00:53:00,445] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:249000] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 81%|████████ | 256/316 [03:41<00:44, 1.34 examples/s][2026-04-24 00:53:02,716] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:248995] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 81%|████████▏ | 257/316 [03:43<01:09, 1.17s/ examples][2026-04-24 00:53:03,895] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:249001] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 82%|████████▏ | 258/316 [03:44<01:08, 1.17s/ examples][2026-04-24 00:53:04,057] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:249003] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 82%|████████▏ | 259/316 [03:44<00:50, 1.13 examples/s][2026-04-24 00:53:04,167] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:249002] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 82%|████████▏ | 260/316 [03:44<00:37, 1.48 examples/s][2026-04-24 00:53:06,411] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:249005] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 83%|████████▎ | 261/316 [03:47<01:01, 1.12s/ examples][2026-04-24 00:53:06,599] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:249006] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 83%|████████▎ | 262/316 [03:47<00:45, 1.19 examples/s][2026-04-24 00:53:07,788] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:249007] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 83%|████████▎ | 263/316 [03:48<00:50, 1.06 examples/s][2026-04-24 00:53:07,953] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:249004] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 84%|████████▎ | 264/316 [03:48<00:38, 1.37 examples/s][2026-04-24 00:53:08,592] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:249008] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 84%|████████▍ | 265/316 [03:49<00:35, 1.46 examples/s][2026-04-24 00:53:10,761] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:249010] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 84%|████████▍ | 266/316 [03:51<00:56, 1.13s/ examples][2026-04-24 00:53:12,187] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:249012] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 84%|████████▍ | 267/316 [03:52<00:59, 1.22s/ examples][2026-04-24 00:53:14,031] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:249011] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 85%|████████▍ | 268/316 [03:54<01:07, 1.41s/ examples][2026-04-24 00:53:14,423] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:249009] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 85%|████████▌ | 269/316 [03:55<00:51, 1.10s/ examples][2026-04-24 00:53:14,781] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:249015] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 85%|████████▌ | 270/316 [03:55<00:40, 1.14 examples/s][2026-04-24 00:53:14,895] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:249013] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 86%|████████▌ | 271/316 [03:55<00:29, 1.54 examples/s][2026-04-24 00:53:15,353] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:249014] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 86%|████████▌ | 272/316 [03:56<00:26, 1.69 examples/s][2026-04-24 00:53:16,986] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:249017] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 86%|████████▋ | 273/316 [03:57<00:38, 1.11 examples/s][2026-04-24 00:53:17,378] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:249016] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 87%|████████▋ | 274/316 [03:58<00:31, 1.33 examples/s][2026-04-24 00:53:17,955] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:249018] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 87%|████████▋ | 275/316 [03:58<00:28, 1.43 examples/s][2026-04-24 00:53:21,134] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:249020] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
[2026-04-24 00:53:21,303] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:249021] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 87%|████████▋ | 276/316 [04:01<00:59, 1.50s/ examples][2026-04-24 00:53:21,692] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:249019] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 88%|████████▊ | 278/316 [04:02<00:33, 1.12 examples/s][2026-04-24 00:53:22,874] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:249023] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 88%|████████▊ | 279/316 [04:03<00:35, 1.04 examples/s][2026-04-24 00:53:23,146] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:249022] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 89%|████████▊ | 280/316 [04:03<00:28, 1.27 examples/s][2026-04-24 00:53:24,676] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:249024] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 89%|████████▉ | 281/316 [04:05<00:34, 1.01 examples/s][2026-04-24 00:53:25,113] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:249025] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 89%|████████▉ | 282/316 [04:05<00:28, 1.20 examples/s][2026-04-24 00:53:25,693] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:249026] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 90%|████████▉ | 283/316 [04:06<00:25, 1.32 examples/s][2026-04-24 00:53:27,224] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:249028] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 90%|████████▉ | 284/316 [04:07<00:31, 1.02 examples/s][2026-04-24 00:53:30,479] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:249030] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 90%|█████████ | 285/316 [04:11<00:51, 1.65s/ examples][2026-04-24 00:53:31,195] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:249032] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 91%|█████████ | 286/316 [04:11<00:41, 1.37s/ examples][2026-04-24 00:53:32,003] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:249029] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 91%|█████████ | 287/316 [04:12<00:35, 1.21s/ examples][2026-04-24 00:53:32,284] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:249031] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 91%|█████████ | 288/316 [04:12<00:26, 1.08 examples/s][2026-04-24 00:53:32,873] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:249034] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 91%|█████████▏| 289/316 [04:13<00:22, 1.21 examples/s][2026-04-24 00:53:34,407] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:249035] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 92%|█████████▏| 290/316 [04:15<00:27, 1.04s/ examples][2026-04-24 00:53:35,224] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:249036] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 92%|█████████▏| 291/316 [04:15<00:24, 1.03 examples/s][2026-04-24 00:53:36,583] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:249037] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 92%|█████████▏| 292/316 [04:17<00:26, 1.09s/ examples][2026-04-24 00:53:37,391] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:249027] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 93%|█████████▎| 293/316 [04:18<00:23, 1.01s/ examples][2026-04-24 00:53:37,472] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:249038] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
[2026-04-24 00:53:38,568] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:249039] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 93%|█████████▎| 295/316 [04:19<00:17, 1.23 examples/s][2026-04-24 00:53:39,682] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:249041] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 94%|█████████▎| 296/316 [04:20<00:17, 1.12 examples/s][2026-04-24 00:53:40,635] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:249040] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 94%|█████████▍| 297/316 [04:21<00:17, 1.10 examples/s][2026-04-24 00:53:42,096] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:249043] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 94%|█████████▍| 298/316 [04:22<00:18, 1.05s/ examples][2026-04-24 00:53:42,660] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:249044] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 95%|█████████▍| 299/316 [04:23<00:15, 1.09 examples/s][2026-04-24 00:53:42,952] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:249042] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 95%|█████████▍| 300/316 [04:23<00:11, 1.36 examples/s][2026-04-24 00:53:44,248] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:249033] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 95%|█████████▌| 301/316 [04:24<00:13, 1.11 examples/s][2026-04-24 00:53:46,167] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:249047] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 96%|█████████▌| 302/316 [04:26<00:16, 1.20s/ examples][2026-04-24 00:53:46,999] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:249048] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 96%|█████████▌| 303/316 [04:27<00:14, 1.09s/ examples][2026-04-24 00:53:47,089] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:249049] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 96%|█████████▌| 304/316 [04:27<00:09, 1.26 examples/s][2026-04-24 00:53:47,180] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:249045] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
[2026-04-24 00:53:48,981] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:249046] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 97%|█████████▋| 306/316 [04:29<00:08, 1.16 examples/s][2026-04-24 00:53:49,326] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:249050] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 97%|█████████▋| 307/316 [04:29<00:06, 1.36 examples/s][2026-04-24 00:53:49,707] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:249051] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 97%|█████████▋| 308/316 [04:30<00:05, 1.56 examples/s][2026-04-24 00:53:50,363] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:249052] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 98%|█████████▊| 309/316 [04:31<00:04, 1.54 examples/s][2026-04-24 00:53:52,130] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:249053] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 98%|█████████▊| 310/316 [04:32<00:05, 1.04 examples/s][2026-04-24 00:53:53,699] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:249055] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 98%|█████████▊| 311/316 [04:34<00:05, 1.13s/ examples][2026-04-24 00:53:53,788] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:249056] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
[2026-04-24 00:53:57,227] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:249054] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 99%|█████████▉| 313/316 [04:37<00:04, 1.42s/ examples][2026-04-24 00:53:57,705] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:249057] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 99%|█████████▉| 314/316 [04:38<00:02, 1.19s/ examples][2026-04-24 00:53:58,163] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:249058] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 100%|█████████▉| 315/316 [04:38<00:00, 1.00 examples/s][2026-04-24 00:54:01,710] [WARNING] [axolotl.prompt_strategies.chat_template._tokenize_single_prompt:495] [PID:249059] Last turn is not trainable, skipping having to find the turn indices. This may cause incorrect last EOT/EOS token to be unmasked.This is likely a dataset design issue. Please ensure last turn is trainable.
|
||
|
||
Tokenizing Prompts (num_proc=288): 100%|██████████| 316/316 [04:42<00:00, 1.68s/ examples]
|
||
Tokenizing Prompts (num_proc=288): 100%|██████████| 316/316 [04:42<00:00, 1.12 examples/s]
|
||
[2026-04-24 00:54:02,155] [INFO] [axolotl.utils.data.utils._log_dataset_stats:212] [PID:248539] min_input_len: 1292
|
||
[2026-04-24 00:54:02,155] [INFO] [axolotl.utils.data.utils._log_dataset_stats:213] [PID:248539] max_input_len: 87006
|
||
|
||
Dropping Invalid Sequences (<None or >32768) (num_proc=288): 0%| | 0/316 [00:00<?, ? examples/s]
|
||
Dropping Invalid Sequences (<None or >32768) (num_proc=288): 1%| | 2/316 [00:02<06:49, 1.30s/ examples]
|
||
Dropping Invalid Sequences (<None or >32768) (num_proc=288): 3%|▎ | 10/316 [00:02<01:03, 4.81 examples/s]
|
||
Dropping Invalid Sequences (<None or >32768) (num_proc=288): 5%|▌ | 16/316 [00:02<00:35, 8.46 examples/s]
|
||
Dropping Invalid Sequences (<None or >32768) (num_proc=288): 7%|▋ | 22/316 [00:02<00:22, 12.80 examples/s]
|
||
Dropping Invalid Sequences (<None or >32768) (num_proc=288): 9%|▉ | 28/316 [00:03<00:16, 17.23 examples/s]
|
||
Dropping Invalid Sequences (<None or >32768) (num_proc=288): 11%|█ | 34/316 [00:03<00:12, 21.97 examples/s]
|
||
Dropping Invalid Sequences (<None or >32768) (num_proc=288): 13%|█▎ | 42/316 [00:03<00:09, 29.24 examples/s]
|
||
Dropping Invalid Sequences (<None or >32768) (num_proc=288): 15%|█▌ | 48/316 [00:03<00:08, 33.09 examples/s]
|
||
Dropping Invalid Sequences (<None or >32768) (num_proc=288): 17%|█▋ | 54/316 [00:03<00:07, 36.43 examples/s]
|
||
Dropping Invalid Sequences (<None or >32768) (num_proc=288): 19%|█▊ | 59/316 [00:03<00:07, 36.45 examples/s]
|
||
Dropping Invalid Sequences (<None or >32768) (num_proc=288): 20%|██ | 64/316 [00:03<00:07, 32.37 examples/s]
|
||
Dropping Invalid Sequences (<None or >32768) (num_proc=288): 22%|██▏ | 68/316 [00:04<00:07, 32.18 examples/s]
|
||
Dropping Invalid Sequences (<None or >32768) (num_proc=288): 23%|██▎ | 72/316 [00:04<00:07, 32.62 examples/s]
|
||
Dropping Invalid Sequences (<None or >32768) (num_proc=288): 24%|██▍ | 76/316 [00:04<00:07, 31.35 examples/s]
|
||
Dropping Invalid Sequences (<None or >32768) (num_proc=288): 25%|██▌ | 80/316 [00:04<00:07, 30.40 examples/s]
|
||
Dropping Invalid Sequences (<None or >32768) (num_proc=288): 27%|██▋ | 84/316 [00:04<00:07, 29.49 examples/s]
|
||
Dropping Invalid Sequences (<None or >32768) (num_proc=288): 28%|██▊ | 88/316 [00:04<00:07, 29.72 examples/s]
|
||
Dropping Invalid Sequences (<None or >32768) (num_proc=288): 29%|██▉ | 92/316 [00:04<00:07, 28.96 examples/s]
|
||
Dropping Invalid Sequences (<None or >32768) (num_proc=288): 30%|███ | 95/316 [00:05<00:07, 28.67 examples/s]
|
||
Dropping Invalid Sequences (<None or >32768) (num_proc=288): 31%|███ | 98/316 [00:05<00:07, 28.11 examples/s]
|
||
Dropping Invalid Sequences (<None or >32768) (num_proc=288): 32%|███▏ | 102/316 [00:05<00:07, 27.81 examples/s]
|
||
Dropping Invalid Sequences (<None or >32768) (num_proc=288): 78%|███████▊ | 248/316 [00:05<00:00, 377.30 examples/s]
|
||
Dropping Invalid Sequences (<None or >32768) (num_proc=288): 100%|██████████| 316/316 [00:05<00:00, 55.89 examples/s]
|
||
[2026-04-24 00:54:09,851] [INFO] [axolotl.utils.data.utils._drop_outside_range:306] [PID:248539] Dropped 135 sequences outside valid range ([None, 32768])
|
||
|
||
Saving the dataset (0/1 shards): 0%| | 0/181 [00:00<?, ? examples/s]
|
||
Saving the dataset (0/1 shards): 100%|██████████| 181/181 [00:00<00:00, 864.82 examples/s]
|
||
Saving the dataset (1/1 shards): 100%|██████████| 181/181 [00:00<00:00, 864.82 examples/s]
|
||
Saving the dataset (1/1 shards): 100%|██████████| 181/181 [00:00<00:00, 652.92 examples/s]
|
||
[2026-04-24 00:54:10,254] [DEBUG] [axolotl.utils.trainer.calculate_total_num_steps:420] [PID:248539] total_num_tokens: 4_184_767
|
||
[2026-04-24 00:54:10,293] [DEBUG] [axolotl.utils.trainer.calculate_total_num_steps:438] [PID:248539] `total_supervised_tokens: 3_566_696`
|
||
[2026-04-24 00:54:10,294] [DEBUG] [axolotl.utils.trainer.calculate_total_num_steps:521] [PID:248539] total_num_steps: 17
|
||
[2026-04-24 00:54:10,294] [INFO] [axolotl.utils.data.sft._prepare_standard_dataset:121] [PID:248539] Maximum number of steps set at 17
|
||
[2026-04-24 00:54:10,340] [DEBUG] [axolotl.train.setup_model_and_tokenizer:70] [PID:248539] loading tokenizer... Qwen/Qwen3-8B
|
||
[2026-04-24 00:54:10,812] [DEBUG] [axolotl.loaders.tokenizer.load_tokenizer:307] [PID:248539] EOS: 151645 / <|im_end|>
|
||
[2026-04-24 00:54:10,812] [DEBUG] [axolotl.loaders.tokenizer.load_tokenizer:308] [PID:248539] BOS: None / None
|
||
[2026-04-24 00:54:10,812] [DEBUG] [axolotl.loaders.tokenizer.load_tokenizer:309] [PID:248539] PAD: 151643 / <|endoftext|>
|
||
[2026-04-24 00:54:10,812] [DEBUG] [axolotl.loaders.tokenizer.load_tokenizer:310] [PID:248539] UNK: None / None
|
||
[2026-04-24 00:54:10,812] [DEBUG] [axolotl.train.setup_model_and_tokenizer:81] [PID:248539] Loading model
|
||
[2026-04-24 00:54:10,819] [DEBUG] [axolotl.monkeypatch.transformers.trainer_loss_calc.patch_evaluation_loop:94] [PID:248539] Patched Trainer.evaluation_loop with nanmean loss calculation
|
||
[2026-04-24 00:54:10,820] [DEBUG] [axolotl.monkeypatch.transformers.trainer_loss_calc.patch_maybe_log_save_evaluate:148] [PID:248539] Patched Trainer._maybe_log_save_evaluate with nanmean loss calculation
|
||
[2026-04-24 00:54:11,321] [INFO] [axolotl.monkeypatch.attention.flash_attn_4.patch_flash_attn_4:52] [PID:248539] Flash Attention 4 is available for your GPU and offers faster training speeds. To enable: pip install flash-attn-4
|
||
[2026-04-24 00:54:11,322] [INFO] [axolotl.monkeypatch.deepspeed_utils.patch_checkpoint_wrapper_setattr:52] [PID:248539] CheckpointWrapper patched to forward DeepSpeed attributes
|
||
[2026-04-24 00:54:46,885] [INFO] [axolotl.loaders.model._configure_embedding_dtypes:361] [PID:248539] Converting modules to torch.bfloat16
|
||
[2026-04-24 00:54:46,889] [DEBUG] [axolotl.loaders.model.log_gpu_memory_usage:127] [PID:248539] Memory usage after model load 4.973GB (+4.973GB allocated, +5.080GB reserved)
|
||
[2026-04-24 00:54:55,336] [INFO] [axolotl.train.save_initial_configs:421] [PID:248539] Pre-saving tokenizer to /e/data1/datasets/playground/ot-baf/checkpoints/sera-v4-316-axolotl__Qwen3-8B...
|
||
[2026-04-24 00:54:55,431] [INFO] [axolotl.train.save_initial_configs:426] [PID:248539] Pre-saving model config to /e/data1/datasets/playground/ot-baf/checkpoints/sera-v4-316-axolotl__Qwen3-8B...
|
||
[2026-04-24 00:54:55,434] [INFO] [axolotl.train.execute_training:222] [PID:248539] Starting trainer...
|
||
[2026-04-24 00:54:55,910] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_external_parameters to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:54:55,910] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_external_parameters to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:54:55,910] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_external_parameters to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:54:55,911] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_external_parameters to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:54:55,911] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_external_parameters to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:54:55,911] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_external_parameters to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:54:55,911] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_external_parameters to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:54:55,911] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_external_parameters to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:54:55,911] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_external_parameters to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:54:55,912] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_external_parameters to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:54:55,912] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_external_parameters to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:54:55,912] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_external_parameters to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:54:55,912] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_external_parameters to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:54:55,912] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_external_parameters to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:54:55,912] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_external_parameters to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:54:55,913] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_external_parameters to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:54:55,913] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_external_parameters to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:54:55,913] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_external_parameters to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:54:55,914] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_external_parameters to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:54:55,914] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_external_parameters to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:54:55,914] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_external_parameters to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:54:55,914] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_external_parameters to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:54:55,914] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_external_parameters to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:54:55,915] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_external_parameters to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:54:55,915] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_external_parameters to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:54:55,915] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_external_parameters to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:54:55,915] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_external_parameters to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:54:55,915] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_external_parameters to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:54:55,915] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_external_parameters to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:54:55,915] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_external_parameters to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:54:55,916] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_external_parameters to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:54:55,916] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_external_parameters to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:54:55,916] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_external_parameters to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:54:55,916] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_external_parameters to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:54:55,916] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_external_parameters to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:54:55,916] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_external_parameters to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:54:55,920] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_id to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:54:55,922] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_id to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:54:55,923] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_id to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:54:55,925] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_id to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:54:55,926] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_id to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:54:55,927] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_id to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:54:55,928] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_id to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:54:55,930] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_id to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:54:55,932] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_id to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:54:55,933] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_id to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:54:55,934] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_id to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:54:55,935] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_id to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:54:55,937] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_id to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:54:55,938] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_id to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:54:55,939] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_id to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:54:55,941] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_id to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:54:55,942] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_id to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:54:55,943] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_id to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:54:55,945] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_id to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:54:55,946] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_id to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:54:55,947] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_id to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:54:55,948] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_id to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:54:55,949] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_id to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:54:55,951] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_id to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:54:55,952] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_id to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:54:55,953] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_id to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:54:55,954] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_id to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:54:55,956] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_id to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:54:55,957] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_id to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:54:55,958] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_id to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:54:55,959] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_id to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:54:55,961] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_id to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:54:55,962] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_id to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:54:55,963] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_id to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:54:55,965] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_id to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:54:55,966] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_id to wrapped module Qwen3DecoderLayer
|
||
|
||
0%| | 0/17 [00:00<?, ?it/s][2026-04-24 00:54:58,156] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:54:58,700] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:54:58,756] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:54:58,824] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:54:58,891] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:54:58,954] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:54:59,020] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:54:59,086] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:54:59,153] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:54:59,219] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:54:59,287] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:54:59,351] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:54:59,417] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:54:59,484] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:54:59,551] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:54:59,619] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:54:59,687] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:54:59,751] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:54:59,821] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:54:59,884] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:54:59,955] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:55:00,018] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:55:00,087] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:55:00,153] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:55:00,222] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:55:00,286] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:55:00,356] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:55:00,419] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:55:00,490] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:55:00,554] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:55:00,624] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:55:00,689] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:55:00,760] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:55:00,824] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:55:00,895] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:55:00,959] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:55:12,925] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:55:12,961] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:55:13,010] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:55:13,062] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:55:13,113] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:55:13,165] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:55:13,219] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:55:13,272] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:55:13,323] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:55:13,376] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:55:13,428] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:55:13,481] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:55:13,535] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:55:13,588] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:55:13,643] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:55:13,698] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:55:13,752] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:55:13,807] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:55:13,862] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:55:13,918] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:55:13,973] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:55:14,028] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:55:14,083] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:55:14,140] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:55:14,195] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:55:14,249] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:55:14,304] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:55:14,360] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:55:14,415] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:55:14,471] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:55:14,526] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:55:14,582] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:55:14,639] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:55:14,700] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:55:14,750] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:55:14,802] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:55:25,853] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:55:25,871] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:55:25,904] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:55:25,941] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:55:25,978] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:55:26,015] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:55:26,054] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:55:26,093] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:55:26,130] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:55:26,168] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:55:26,205] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:55:26,243] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:55:26,280] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:55:26,315] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:55:26,354] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:55:26,393] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:55:26,440] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:55:26,488] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:55:26,537] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:55:26,596] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:55:26,646] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:55:26,691] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:55:26,736] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:55:26,785] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:55:26,834] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:55:26,884] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:55:26,933] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:55:26,982] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:55:27,031] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:55:27,090] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:55:27,140] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:55:27,187] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:55:27,232] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:55:27,279] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:55:27,327] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:55:27,377] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:55:34,091] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:55:34,108] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:55:34,160] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:55:34,217] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:55:34,275] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:55:34,332] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:55:34,388] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:55:34,444] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:55:34,499] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:55:34,556] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:55:34,612] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:55:34,671] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:55:34,730] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:55:34,789] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:55:34,848] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:55:34,908] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:55:34,967] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:55:35,026] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:55:35,086] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:55:35,145] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:55:35,205] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:55:35,266] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:55:35,325] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:55:35,385] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:55:35,445] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:55:35,505] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:55:35,565] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:55:35,625] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:55:35,685] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:55:35,745] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:55:35,806] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:55:35,865] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:55:35,925] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:55:35,985] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:55:36,044] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:55:36,105] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:55:47,551] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:55:47,568] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:55:47,632] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:55:47,699] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:55:47,766] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:55:47,832] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:55:47,898] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:55:47,966] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:55:48,033] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:55:48,099] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:55:48,168] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:55:48,235] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:55:48,302] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:55:48,369] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:55:48,436] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:55:48,503] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:55:48,570] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:55:48,637] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:55:48,704] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:55:48,771] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:55:48,838] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:55:48,906] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:55:48,972] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:55:49,039] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:55:49,106] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:55:49,173] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:55:49,240] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:55:49,307] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:55:49,375] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:55:49,442] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:55:49,510] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:55:49,577] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:55:49,645] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:55:49,712] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:55:49,781] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:55:49,849] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:55:58,459] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:55:58,477] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:55:58,541] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:55:58,607] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:55:58,673] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:55:58,740] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:55:58,807] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:55:58,873] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:55:58,940] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:55:59,006] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:55:59,073] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:55:59,141] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:55:59,210] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:55:59,276] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:55:59,342] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:55:59,408] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:55:59,474] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:55:59,541] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:55:59,609] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:55:59,676] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:55:59,743] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:55:59,810] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:55:59,876] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:55:59,943] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:56:00,010] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:56:00,076] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:56:00,143] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:56:00,209] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:56:00,276] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:56:00,343] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:56:00,410] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:56:00,476] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:56:00,543] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:56:00,611] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:56:00,679] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:56:00,746] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:56:09,276] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:56:09,305] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:56:09,324] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:56:09,354] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:56:09,385] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:56:09,414] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:56:09,446] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:56:09,475] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:56:09,531] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:56:09,590] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:56:09,655] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:56:09,738] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:56:09,801] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:56:09,867] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:56:09,926] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:56:09,992] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:56:10,053] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:56:10,116] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:56:10,181] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:56:10,247] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:56:10,311] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:56:10,377] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:56:10,442] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:56:10,508] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:56:10,573] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:56:10,638] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:56:10,703] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:56:10,770] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:56:10,833] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:56:10,898] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:56:10,964] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:56:11,028] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:56:11,094] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:56:11,159] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:56:11,222] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:56:11,289] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:56:23,079] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:56:23,096] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:56:23,145] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:56:23,195] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:56:23,246] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:56:23,297] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:56:23,349] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:56:23,403] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:56:23,455] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:56:23,508] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:56:23,559] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:56:23,611] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:56:23,664] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:56:23,717] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:56:23,772] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:56:23,825] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:56:23,877] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:56:23,930] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:56:23,982] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:56:24,034] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:56:24,085] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:56:24,137] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:56:24,188] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:56:24,240] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:56:24,291] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:56:24,343] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:56:24,395] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:56:24,449] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:56:24,501] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:56:24,554] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:56:24,608] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:56:24,658] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:56:24,710] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:56:24,766] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:56:24,821] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:56:24,877] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
|
||
6%|▌ | 1/17 [01:35<25:35, 95.94s/it]
|
||
|
||
{'loss': '0.5113', 'grad_norm': '12.72', 'learning_rate': '0', 'ppl': '1.668', 'memory/max_active (GiB)': '78.28', 'memory/max_allocated (GiB)': '76.8', 'memory/device_reserved (GiB)': '90.72', 'tokens/train_per_sec_per_gpu': '95.38', 'tokens/total': 779008, 'tokens/trainable': 269929, 'epoch': '0.1739'}
|
||
|
||
6%|▌ | 1/17 [01:35<25:35, 95.94s/it][2026-04-24 00:56:33,045] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:56:33,074] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:56:33,121] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:56:33,170] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:56:33,220] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:56:33,270] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:56:33,319] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:56:33,367] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:56:33,418] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:56:33,468] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:56:33,517] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:56:33,567] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:56:33,617] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:56:33,667] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:56:33,717] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:56:33,768] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:56:33,818] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:56:33,867] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:56:33,917] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:56:33,968] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:56:34,018] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:56:34,068] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:56:34,118] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:56:34,168] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:56:34,218] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:56:34,268] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:56:34,319] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:56:34,368] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:56:34,419] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:56:34,468] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:56:34,519] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:56:34,569] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:56:34,620] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:56:34,670] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:56:34,720] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:56:34,771] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:56:41,592] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:56:41,649] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:56:41,677] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:56:41,704] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:56:41,731] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:56:41,757] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:56:41,783] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:56:41,805] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:56:41,845] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:56:41,907] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:56:41,971] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:56:42,035] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:56:42,100] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:56:42,163] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:56:42,227] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:56:42,290] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:56:42,354] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:56:42,417] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:56:42,482] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:56:42,544] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:56:42,607] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:56:42,671] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:56:42,734] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:56:42,797] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:56:42,861] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:56:42,924] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:56:42,988] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:56:43,051] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:56:43,115] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:56:43,179] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:56:43,244] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:56:43,305] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:56:43,369] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:56:43,432] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:56:43,495] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:56:43,556] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:56:52,550] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:56:52,567] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:56:52,626] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:56:52,687] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:56:52,749] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:56:52,810] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:56:52,871] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:56:52,933] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:56:52,995] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:56:53,057] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:56:53,121] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:56:53,183] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:56:53,246] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:56:53,308] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:56:53,371] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:56:53,433] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:56:53,495] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:56:53,558] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:56:53,621] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:56:53,684] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:56:53,746] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:56:53,809] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:56:53,871] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:56:53,933] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:56:53,995] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:56:54,057] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:56:54,121] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:56:54,184] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:56:54,246] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:56:54,309] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:56:54,373] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:56:54,435] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:56:54,497] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:56:54,561] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:56:54,624] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:56:54,687] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:57:03,245] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:57:03,297] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:57:03,350] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:57:03,406] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:57:03,461] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:57:03,514] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:57:03,568] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:57:03,621] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:57:03,674] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:57:03,728] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:57:03,781] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:57:03,836] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:57:03,892] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:57:03,947] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:57:04,003] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:57:04,059] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:57:04,115] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:57:04,171] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:57:04,227] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:57:04,282] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:57:04,339] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:57:04,396] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:57:04,453] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:57:04,510] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:57:04,568] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:57:04,626] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:57:04,683] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:57:04,741] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:57:04,799] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:57:04,858] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:57:04,919] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:57:04,976] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:57:05,035] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:57:05,092] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:57:05,150] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:57:05,208] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:57:13,802] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:57:13,821] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:57:13,859] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:57:13,899] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:57:13,937] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:57:13,975] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:57:14,014] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:57:14,053] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:57:14,093] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:57:14,132] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:57:14,170] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:57:14,210] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:57:14,250] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:57:14,288] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:57:14,327] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:57:14,367] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:57:14,407] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:57:14,447] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:57:14,488] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:57:14,527] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:57:14,567] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:57:14,608] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:57:14,648] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:57:14,688] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:57:14,728] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:57:14,768] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:57:14,808] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:57:14,848] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:57:14,888] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:57:14,929] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:57:14,971] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:57:15,011] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:57:15,052] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:57:15,094] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:57:15,135] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:57:15,176] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:57:21,069] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:57:21,086] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:57:21,123] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:57:21,162] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:57:21,199] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:57:21,240] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:57:21,281] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:57:21,319] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:57:21,358] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:57:21,399] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:57:21,438] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:57:21,477] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:57:21,522] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:57:21,573] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:57:21,626] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:57:21,682] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:57:21,736] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:57:21,790] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:57:21,846] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:57:21,903] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:57:21,958] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:57:22,014] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:57:22,069] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:57:22,124] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:57:22,179] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:57:22,234] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:57:22,289] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:57:22,346] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:57:22,402] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:57:22,457] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:57:22,512] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:57:22,567] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:57:22,622] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:57:22,678] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:57:22,733] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:57:22,788] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:57:31,524] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:57:31,542] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:57:31,592] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:57:31,649] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:57:31,704] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:57:31,759] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:57:31,814] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:57:31,870] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:57:31,927] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:57:31,985] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:57:32,042] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:57:32,101] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:57:32,160] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:57:32,219] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:57:32,276] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:57:32,331] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:57:32,386] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:57:32,442] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:57:32,499] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:57:32,557] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:57:32,616] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:57:32,677] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:57:32,735] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:57:32,792] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:57:32,852] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:57:32,911] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:57:32,970] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:57:33,029] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:57:33,083] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:57:33,138] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:57:33,194] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:57:33,251] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:57:33,309] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:57:33,367] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:57:33,426] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:57:33,485] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:57:42,876] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:57:42,893] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:57:42,937] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:57:42,986] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:57:43,033] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:57:43,080] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:57:43,131] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:57:43,181] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:57:43,232] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:57:43,280] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:57:43,331] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:57:43,379] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:57:43,431] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:57:43,481] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:57:43,533] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:57:43,584] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:57:43,632] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:57:43,681] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:57:43,732] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:57:43,783] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:57:43,839] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:57:43,900] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:57:43,959] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:57:44,019] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:57:44,079] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:57:44,140] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:57:44,200] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:57:44,260] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:57:44,320] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:57:44,380] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:57:44,441] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:57:44,502] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:57:44,562] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:57:44,623] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:57:44,684] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:57:44,745] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:57:54,024] [WARNING] [stage3.py:2497:step] 5 pytorch allocator cache flushes since last step. this happens when there is high memory pressure and is detrimental to performance. if this is happening frequently consider adjusting settings to reduce memory consumption. If you are unable to make the cache flushes go away consider adding get_accelerator().empty_cache() calls in your training loop to ensure that all ranks flush their caches at the same time
|
||
|
||
12%|█▏ | 2/17 [02:56<21:47, 87.16s/it]
|
||
|
||
{'loss': '0.5279', 'grad_norm': '13.61', 'learning_rate': '3.333e-06', 'ppl': '1.695', 'memory/max_active (GiB)': '89.23', 'memory/max_allocated (GiB)': '89.23', 'memory/device_reserved (GiB)': '91.99', 'tokens/train_per_sec_per_gpu': '87.14', 'tokens/total': 1528704, 'tokens/trainable': 521951, 'epoch': '0.3478'}
|
||
|
||
12%|█▏ | 2/17 [02:56<21:47, 87.16s/it][2026-04-24 00:57:54,049] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:57:54,081] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:57:54,134] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:57:54,188] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:57:54,242] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:57:54,298] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:57:54,352] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:57:54,407] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:57:54,463] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:57:54,519] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:57:54,946] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:57:55,001] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:57:55,056] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:57:55,111] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:57:55,523] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:57:55,944] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:57:55,998] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:57:56,052] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:57:56,108] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:57:56,163] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:57:56,219] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:57:56,273] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:57:56,724] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:57:56,755] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:57:56,808] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:57:56,865] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:57:56,920] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:57:56,973] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:57:57,028] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:57:57,084] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:57:57,138] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:57:57,193] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:57:57,248] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:57:57,304] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:57:57,361] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:57:57,417] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:58:05,093] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:58:05,114] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:58:05,167] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:58:05,224] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:58:05,281] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:58:05,337] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:58:05,394] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:58:05,451] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:58:05,508] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:58:05,565] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:58:05,622] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:58:05,680] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:58:05,737] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:58:05,793] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:58:05,848] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:58:05,905] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:58:05,962] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:58:06,019] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:58:06,076] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:58:06,133] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:58:06,189] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:58:06,247] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:58:06,303] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:58:06,359] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:58:06,416] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:58:06,473] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:58:06,531] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:58:06,588] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:58:06,645] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:58:06,701] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:58:06,759] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:58:06,816] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:58:06,874] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:58:06,931] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:58:06,989] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:58:07,045] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:58:14,718] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:58:14,736] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:58:14,756] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:58:14,778] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:58:14,799] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:58:14,821] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:58:14,843] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:58:14,916] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:58:14,972] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:58:15,026] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:58:15,094] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:58:15,153] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:58:15,217] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:58:15,295] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:58:15,352] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:58:15,417] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:58:15,480] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:58:15,542] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:58:15,625] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:58:15,683] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:58:15,737] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:58:15,807] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:58:15,871] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:58:15,935] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:58:15,999] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:58:16,063] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:58:16,129] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:58:16,194] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:58:16,258] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:58:16,323] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:58:16,390] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:58:16,454] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:58:16,519] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:58:16,585] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:58:16,650] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:58:16,715] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:58:26,497] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:58:26,515] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:58:26,564] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:58:26,614] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:58:26,666] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:58:26,718] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:58:26,770] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:58:26,824] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:58:26,877] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:58:26,932] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:58:26,983] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:58:27,034] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:58:27,086] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:58:27,138] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:58:27,193] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:58:27,248] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:58:27,303] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:58:27,355] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:58:27,407] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:58:27,461] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:58:27,515] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:58:27,572] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:58:27,627] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:58:27,685] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:58:27,747] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:58:27,822] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:58:27,889] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:58:27,946] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:58:28,005] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:58:28,064] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:58:28,127] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:58:28,189] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:58:28,252] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:58:28,315] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:58:28,378] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:58:28,441] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:58:38,428] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:58:38,446] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:58:38,498] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:58:38,552] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:58:38,607] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:58:38,662] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:58:38,718] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:58:38,773] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:58:38,828] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:58:38,884] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:58:38,940] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:58:38,999] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:58:39,059] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:58:39,117] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:58:39,177] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:58:39,236] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:58:39,295] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:58:39,354] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:58:39,413] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:58:39,473] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:58:39,532] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:58:39,590] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:58:39,648] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:58:39,707] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:58:39,767] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:58:39,825] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:58:39,882] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:58:39,937] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:58:39,992] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:58:40,048] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:58:40,106] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:58:40,163] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:58:40,220] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:58:40,280] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:58:40,337] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:58:40,396] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:58:49,439] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:58:49,457] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:58:49,501] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:58:49,546] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:58:49,592] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:58:49,640] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:58:49,689] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:58:49,736] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:58:49,782] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:58:49,831] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:58:49,879] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:58:49,925] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:58:49,972] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:58:50,022] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:58:50,080] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:58:50,140] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:58:50,201] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:58:50,262] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:58:50,324] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:58:50,387] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:58:50,451] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:58:50,515] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:58:50,577] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:58:50,639] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:58:50,702] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:58:50,764] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:58:50,826] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:58:50,890] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:58:50,954] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:58:51,016] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:58:51,080] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:58:51,142] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:58:51,206] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:58:51,272] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:58:51,331] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:58:51,394] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:59:01,075] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:59:01,096] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:59:01,142] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:59:01,191] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:59:01,240] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:59:01,289] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:59:01,339] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:59:01,389] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:59:01,438] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:59:01,487] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:59:01,536] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:59:01,586] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:59:01,636] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:59:01,685] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:59:01,735] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:59:01,784] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:59:01,833] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:59:01,883] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:59:01,932] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:59:01,981] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:59:02,030] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:59:02,080] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:59:02,129] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:59:02,179] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:59:02,228] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:59:02,277] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:59:02,326] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:59:02,375] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:59:02,425] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:59:02,475] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:59:02,525] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:59:02,574] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:59:02,623] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:59:02,673] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:59:02,724] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:59:02,773] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:59:09,592] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:59:09,611] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:59:09,641] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:59:09,673] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:59:09,706] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:59:09,741] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:59:09,773] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:59:09,807] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:59:09,842] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:59:09,874] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:59:09,924] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:59:09,975] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:59:10,025] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:59:10,079] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:59:10,143] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:59:10,195] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:59:10,245] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:59:10,302] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:59:10,356] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:59:10,410] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:59:10,465] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:59:10,522] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:59:10,576] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:59:10,631] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:59:10,686] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:59:10,742] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:59:10,797] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:59:10,852] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:59:10,906] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:59:10,961] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:59:11,017] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:59:11,071] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:59:11,126] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:59:11,182] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:59:11,237] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:59:11,292] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:59:19,562] [WARNING] [stage3.py:2497:step] 6 pytorch allocator cache flushes since last step. this happens when there is high memory pressure and is detrimental to performance. if this is happening frequently consider adjusting settings to reduce memory consumption. If you are unable to make the cache flushes go away consider adding get_accelerator().empty_cache() calls in your training loop to ensure that all ranks flush their caches at the same time
|
||
|
||
18%|█▊ | 3/17 [04:22<20:09, 86.42s/it]
|
||
|
||
{'loss': '0.5045', 'grad_norm': '10.16', 'learning_rate': '6.667e-06', 'ppl': '1.656', 'memory/max_active (GiB)': '85.22', 'memory/max_allocated (GiB)': '85.22', 'memory/device_reserved (GiB)': '92.49', 'tokens/train_per_sec_per_gpu': '96.86', 'tokens/total': 2261376, 'tokens/trainable': 792517, 'epoch': '0.5217'}
|
||
|
||
18%|█▊ | 3/17 [04:22<20:09, 86.42s/it][2026-04-24 00:59:19,588] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:59:19,623] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:59:19,683] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:59:19,745] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:59:19,805] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:59:19,866] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:59:19,926] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:59:19,988] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:59:20,049] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:59:20,110] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:59:20,172] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:59:20,234] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:59:20,295] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:59:20,357] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:59:20,419] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:59:20,481] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:59:20,543] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:59:20,604] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:59:21,043] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:59:21,098] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:59:21,158] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:59:21,218] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:59:21,278] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:59:21,339] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:59:21,399] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:59:21,459] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:59:21,520] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:59:21,583] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:59:21,646] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:59:21,707] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:59:21,770] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:59:21,832] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:59:21,894] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:59:21,956] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:59:22,019] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:59:22,081] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:59:32,269] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:59:32,290] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:59:32,336] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:59:32,386] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:59:32,437] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:59:32,488] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:59:32,540] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:59:32,591] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:59:32,643] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:59:32,693] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:59:32,746] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:59:32,797] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:59:32,849] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:59:32,901] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:59:33,324] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:59:33,370] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:59:33,421] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:59:33,471] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:59:33,522] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:59:33,572] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:59:33,621] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:59:33,671] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:59:33,724] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:59:33,773] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:59:33,825] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:59:33,876] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:59:33,928] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:59:33,979] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:59:34,031] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:59:34,083] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:59:34,135] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:59:34,187] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:59:34,240] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:59:34,334] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:59:34,348] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:59:34,404] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:59:42,117] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:59:42,137] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:59:42,183] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:59:42,232] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:59:42,281] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:59:42,330] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:59:42,379] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:59:42,428] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:59:42,478] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:59:42,527] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:59:42,577] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:59:42,626] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:59:42,677] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:59:42,725] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:59:42,775] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:59:42,824] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:59:42,874] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:59:42,924] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:59:42,973] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:59:43,023] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:59:43,072] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:59:43,124] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:59:43,172] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:59:43,221] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:59:43,270] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:59:43,320] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:59:43,369] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:59:43,420] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:59:43,469] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:59:43,519] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:59:43,570] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:59:43,619] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:59:43,668] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:59:43,719] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:59:43,770] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:59:43,820] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:59:50,311] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:59:50,331] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:59:50,381] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:59:50,433] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:59:50,485] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:59:50,540] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:59:50,595] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:59:50,650] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:59:50,704] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:59:50,758] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:59:50,813] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:59:50,868] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:59:50,923] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:59:50,976] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:59:51,030] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:59:51,085] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:59:51,138] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:59:51,191] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:59:51,245] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:59:51,299] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:59:51,351] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:59:51,404] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:59:51,456] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:59:51,510] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:59:51,565] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:59:51,619] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:59:51,674] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:59:51,728] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:59:51,782] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:59:51,836] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:59:51,891] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:59:51,944] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:59:51,997] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:59:52,051] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:59:52,105] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:59:52,157] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:59:59,570] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:59:59,590] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:59:59,636] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:59:59,685] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:59:59,734] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:59:59,783] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:59:59,832] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:59:59,881] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:59:59,931] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 00:59:59,981] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:00,031] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:00,081] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:00,131] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:00,180] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:00,230] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:00,280] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:00,330] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:00,380] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:00,430] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:00,480] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:00,530] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:00,581] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:00,630] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:00,680] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:00,730] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:00,780] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:00,830] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:00,880] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:00,930] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:00,980] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:01,032] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:01,082] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:01,133] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:01,184] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:01,235] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:01,284] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:08,286] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:08,306] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:08,347] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:08,392] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:08,436] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:08,480] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:08,523] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:08,567] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:08,612] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:08,656] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:08,700] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:08,743] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:08,788] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:08,832] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:08,876] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:08,921] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:08,964] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:09,008] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:09,052] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:09,097] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:09,141] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:09,185] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:09,229] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:09,273] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:09,318] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:09,362] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:09,405] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:09,449] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:09,494] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:09,538] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:09,583] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:09,626] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:09,671] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:09,716] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:09,761] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:09,804] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:15,855] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:15,875] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:15,924] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:15,976] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:16,027] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:16,079] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:16,131] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:16,183] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:16,234] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:16,287] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:16,339] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:16,390] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:16,443] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:16,494] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:16,546] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:16,598] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:16,649] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:16,702] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:16,753] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:16,806] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:16,857] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:16,910] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:16,961] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:17,013] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:17,065] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:17,117] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:17,169] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:17,221] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:17,273] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:17,325] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:17,377] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:17,429] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:17,481] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:17,532] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:17,585] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:17,637] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:24,795] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:24,815] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:24,846] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:24,879] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:24,911] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:24,945] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:24,978] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:25,012] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:25,046] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:25,078] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:25,121] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:25,169] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:25,223] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:25,292] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:25,343] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:25,394] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:25,442] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:25,495] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:25,549] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:25,603] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:25,656] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:25,710] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:25,775] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:25,830] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:25,878] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:25,928] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:25,981] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:26,036] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:26,090] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:26,144] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:26,199] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:26,251] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:26,305] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:26,359] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:26,413] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:26,467] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:34,739] [WARNING] [stage3.py:2497:step] 6 pytorch allocator cache flushes since last step. this happens when there is high memory pressure and is detrimental to performance. if this is happening frequently consider adjusting settings to reduce memory consumption. If you are unable to make the cache flushes go away consider adding get_accelerator().empty_cache() calls in your training loop to ensure that all ranks flush their caches at the same time
|
||
|
||
24%|██▎ | 4/17 [05:37<17:45, 81.98s/it]
|
||
|
||
{'loss': '0.401', 'grad_norm': '2.152', 'learning_rate': '1e-05', 'ppl': '1.493', 'memory/max_active (GiB)': '85.11', 'memory/max_allocated (GiB)': '85.11', 'memory/device_reserved (GiB)': '92.12', 'tokens/train_per_sec_per_gpu': '125', 'tokens/total': 2988992, 'tokens/trainable': 1053901, 'epoch': '0.6957'}
|
||
|
||
24%|██▎ | 4/17 [05:37<17:45, 81.98s/it][2026-04-24 01:00:34,764] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:34,791] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:34,834] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:34,880] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:34,926] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:34,971] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:35,017] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:35,063] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:35,109] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:35,154] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:35,199] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:35,245] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:35,291] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:35,337] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:35,383] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:35,430] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:35,475] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:35,522] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:35,568] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:35,613] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:35,659] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:35,706] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:35,752] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:35,798] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:35,844] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:35,890] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:35,936] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:35,982] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:36,029] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:36,074] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:36,121] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:36,167] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:36,213] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:36,259] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:36,306] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:36,352] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:42,864] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:42,884] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:42,923] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:42,964] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:43,005] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:43,049] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:43,093] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:43,138] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:43,178] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:43,222] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:43,262] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:43,303] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:43,346] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:43,391] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:43,454] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:43,508] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:43,566] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:43,625] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:43,685] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:43,744] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:43,803] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:43,861] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:43,920] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:43,979] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:44,038] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:44,096] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:44,154] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:44,212] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:44,271] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:44,330] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:44,759] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:44,820] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:44,874] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:44,930] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:44,988] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:45,047] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:53,652] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:53,672] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:53,725] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:53,779] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:53,835] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:53,890] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:53,946] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:54,002] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:54,058] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:54,114] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:54,171] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:54,229] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:54,285] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:54,342] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:54,398] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:54,455] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:54,513] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:54,571] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:54,629] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:54,684] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:54,742] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:54,799] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:54,860] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:54,920] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:54,979] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:55,038] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:55,097] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:55,156] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:55,214] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:55,274] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:55,331] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:55,388] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:55,446] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:55,505] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:55,563] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:00:55,621] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:01:04,406] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:01:04,426] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:01:04,465] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:01:04,506] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:01:04,547] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:01:04,590] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:01:04,631] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:01:04,673] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:01:04,716] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:01:04,757] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:01:04,797] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:01:04,839] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:01:04,908] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:01:04,964] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:01:05,027] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:01:05,090] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:01:05,153] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:01:05,215] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:01:05,277] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:01:05,340] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:01:05,400] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:01:05,463] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:01:05,525] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:01:05,589] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:01:05,652] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:01:05,714] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:01:05,776] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:01:05,839] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:01:05,902] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:01:05,965] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:01:06,027] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:01:06,090] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:01:06,153] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:01:06,215] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:01:06,277] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:01:06,340] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:01:15,581] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:01:15,601] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:01:15,650] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:01:15,703] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:01:15,755] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:01:15,806] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:01:15,859] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:01:15,912] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:01:15,965] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:01:16,017] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:01:16,071] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:01:16,124] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:01:16,178] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:01:16,232] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:01:16,287] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:01:16,342] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:01:16,396] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:01:16,451] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:01:16,506] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:01:16,562] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:01:16,617] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:01:16,673] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:01:16,728] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:01:16,784] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:01:16,840] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:01:16,895] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:01:16,950] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:01:17,007] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:01:17,063] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:01:17,118] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:01:17,174] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:01:17,230] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:01:17,286] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:01:17,342] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:01:17,396] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:01:17,451] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:01:25,300] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:01:25,320] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:01:25,361] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:01:25,403] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:01:25,446] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:01:25,490] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:01:25,534] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:01:25,578] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:01:25,621] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:01:25,664] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:01:25,708] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:01:25,752] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:01:25,797] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:01:25,842] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:01:25,887] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:01:25,933] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:01:25,978] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:01:26,023] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:01:26,067] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:01:26,112] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:01:26,157] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:01:26,201] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:01:26,246] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:01:26,291] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:01:26,336] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:01:26,380] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:01:26,425] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:01:26,470] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:01:26,516] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:01:26,561] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:01:26,605] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:01:26,650] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:01:26,695] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:01:26,740] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:01:26,785] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:01:26,828] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:01:33,101] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:01:33,121] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:01:33,179] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:01:33,234] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:01:33,291] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:01:33,349] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:01:33,405] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:01:33,463] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:01:33,519] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:01:33,577] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:01:33,634] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:01:33,692] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:01:33,751] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:01:33,809] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:01:33,868] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:01:33,926] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:01:33,983] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:01:34,042] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:01:34,100] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:01:34,158] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:01:34,217] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:01:34,276] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:01:34,334] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:01:34,393] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:01:34,452] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:01:34,511] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:01:34,570] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:01:34,629] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:01:34,687] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:01:34,746] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:01:34,805] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:01:34,863] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:01:34,922] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:01:34,982] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:01:35,042] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:01:35,098] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:01:43,791] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:01:43,810] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:01:43,864] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:01:43,921] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:01:43,977] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:01:44,034] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:01:44,092] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:01:44,151] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:01:44,209] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:01:44,268] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:01:44,326] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:01:44,383] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:01:44,442] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:01:44,499] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:01:44,556] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:01:44,613] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:01:44,671] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:01:44,728] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:01:44,785] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:01:44,842] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:01:44,900] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:01:44,956] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:01:45,013] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:01:45,071] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:01:45,128] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:01:45,185] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:01:45,242] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:01:45,300] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:01:45,356] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:01:45,413] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:01:45,470] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:01:45,528] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:01:45,585] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:01:45,643] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:01:45,701] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:01:45,756] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:01:54,831] [WARNING] [stage3.py:2497:step] 7 pytorch allocator cache flushes since last step. this happens when there is high memory pressure and is detrimental to performance. if this is happening frequently consider adjusting settings to reduce memory consumption. If you are unable to make the cache flushes go away consider adding get_accelerator().empty_cache() calls in your training loop to ensure that all ranks flush their caches at the same time
|
||
|
||
29%|██▉ | 5/17 [06:57<16:15, 81.30s/it]
|
||
|
||
{'loss': '0.3597', 'grad_norm': '1.655', 'learning_rate': '9.875e-06', 'ppl': '1.433', 'memory/max_active (GiB)': '85.57', 'memory/max_allocated (GiB)': '85.57', 'memory/device_reserved (GiB)': '92.88', 'tokens/train_per_sec_per_gpu': '117.7', 'tokens/total': 3735296, 'tokens/trainable': 1324930, 'epoch': '0.8696'}
|
||
|
||
29%|██▉ | 5/17 [06:57<16:15, 81.30s/it][2026-04-24 01:01:54,874] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:01:54,908] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:01:54,967] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:01:55,026] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:01:55,087] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:01:55,147] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:01:55,206] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:01:55,266] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:01:55,327] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:01:55,388] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:01:55,448] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:01:55,510] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:01:55,571] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:01:55,632] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:01:55,693] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:01:55,753] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:01:55,813] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:01:55,874] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:01:55,934] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:01:55,994] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:01:56,055] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:01:56,115] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:01:56,177] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:01:56,237] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:01:56,297] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:01:56,359] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:01:56,420] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:01:56,481] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:01:56,542] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:01:56,603] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:01:56,664] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:01:56,726] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:01:56,787] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:01:56,848] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:01:56,909] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:01:56,970] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:02:06,241] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:02:06,263] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:02:06,292] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:02:06,323] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:02:06,354] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:02:06,386] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:02:06,418] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:02:06,450] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:02:06,483] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:02:06,517] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:02:06,547] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:02:06,581] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:02:06,628] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:02:06,676] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:02:06,724] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:02:06,773] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:02:07,051] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:02:07,083] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:02:07,114] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:02:07,146] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:02:07,178] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:02:07,209] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:02:07,241] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:02:07,274] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:02:07,306] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:02:07,339] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:02:07,381] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:02:07,429] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:02:07,477] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:02:07,527] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:02:07,599] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:02:07,643] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:02:07,797] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:02:07,845] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:02:07,887] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:02:07,972] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:02:14,885] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:02:14,904] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:02:14,918] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:02:14,931] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:02:14,944] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:02:14,958] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:02:15,009] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:02:15,053] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:02:15,111] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:02:15,175] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:02:15,230] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:02:15,302] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:02:15,722] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:02:15,778] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:02:15,837] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:02:15,895] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:02:15,954] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:02:16,013] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:02:16,072] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:02:16,131] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:02:16,190] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:02:16,248] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:02:16,307] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:02:16,368] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:02:16,428] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:02:16,486] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:02:16,546] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:02:16,606] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:02:16,666] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:02:16,726] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:02:16,785] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:02:16,844] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:02:16,903] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:02:16,963] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:02:17,022] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:02:17,457] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:02:25,600] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:02:25,621] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:02:25,670] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:02:25,721] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:02:25,771] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:02:25,820] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:02:25,871] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:02:25,922] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:02:25,972] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:02:26,023] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:02:26,074] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:02:26,125] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:02:26,176] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:02:26,227] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:02:26,278] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:02:26,329] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:02:26,379] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:02:26,429] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:02:26,481] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:02:26,532] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:02:26,583] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:02:26,634] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:02:26,685] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:02:26,737] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:02:26,787] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:02:26,840] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:02:26,891] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:02:26,942] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:02:26,994] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:02:27,045] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:02:27,096] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:02:27,147] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:02:27,200] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:02:27,251] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:02:27,303] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:02:27,355] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:02:34,281] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:02:34,303] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:02:34,332] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:02:34,362] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:02:34,392] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:02:34,423] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:02:34,454] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:02:34,485] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:02:34,517] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:02:34,548] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:02:34,580] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:02:34,635] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:02:34,685] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:02:34,734] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:02:34,781] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:02:34,828] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:02:34,877] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:02:34,926] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:02:34,975] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:02:35,025] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:02:35,075] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:02:35,124] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:02:35,174] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:02:35,224] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:02:35,274] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:02:35,325] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:02:35,374] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:02:35,425] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:02:35,474] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:02:35,523] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:02:35,573] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:02:35,623] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:02:35,673] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:02:35,723] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:02:35,772] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:02:35,821] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:02:42,909] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:02:42,929] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:02:42,981] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:02:43,035] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:02:43,091] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:02:43,147] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:02:43,202] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:02:43,259] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:02:43,317] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:02:43,375] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:02:43,432] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:02:43,492] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:02:43,552] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:02:43,608] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:02:43,666] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:02:43,722] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:02:43,780] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:02:43,838] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:02:43,897] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:02:43,956] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:02:44,016] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:02:44,075] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:02:44,131] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:02:44,187] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:02:44,245] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:02:44,304] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:02:44,363] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:02:44,423] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:02:44,486] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:02:44,553] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:02:44,618] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:02:44,684] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:02:44,750] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:02:44,815] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:02:44,881] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:02:44,948] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:02:55,494] [WARNING] [stage3.py:2497:step] 5 pytorch allocator cache flushes since last step. this happens when there is high memory pressure and is detrimental to performance. if this is happening frequently consider adjusting settings to reduce memory consumption. If you are unable to make the cache flushes go away consider adding get_accelerator().empty_cache() calls in your training loop to ensure that all ranks flush their caches at the same time
|
||
|
||
35%|███▌ | 6/17 [07:58<13:37, 74.28s/it]
|
||
|
||
{'loss': '0.3373', 'grad_norm': '1.166', 'learning_rate': '9.505e-06', 'ppl': '1.401', 'memory/max_active (GiB)': '85.12', 'memory/max_allocated (GiB)': '85.12', 'memory/device_reserved (GiB)': '92.39', 'tokens/train_per_sec_per_gpu': '146.2', 'tokens/total': 4267200, 'tokens/trainable': 1508368, 'epoch': '1'}
|
||
|
||
35%|███▌ | 6/17 [07:58<13:37, 74.28s/it][2026-04-24 01:02:57,641] [INFO] [axolotl.core.trainers.base._save:741] [PID:248539] Saving model checkpoint to /e/data1/datasets/playground/ot-baf/checkpoints/sera-v4-316-axolotl__Qwen3-8B/checkpoint-6
|
||
|
||
|
||
Writing model shards: 0%| | 0/1 [00:00<?, ?it/s][A
|
||
|
||
Writing model shards: 100%|██████████| 1/1 [00:06<00:00, 6.90s/it][A
|
||
Writing model shards: 100%|██████████| 1/1 [00:06<00:00, 6.91s/it]
|
||
[2026-04-24 01:03:25,844] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:03:25,880] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:03:25,931] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:03:25,983] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:03:26,034] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:03:26,087] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:03:26,141] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:03:26,197] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:03:26,251] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:03:26,305] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:03:26,360] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:03:26,417] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:03:26,473] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:03:26,528] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:03:26,584] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:03:26,639] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:03:26,694] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:03:26,750] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:03:26,805] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:03:26,861] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:03:26,917] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:03:26,972] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:03:27,027] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:03:27,082] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:03:27,137] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:03:27,192] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:03:27,248] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:03:27,303] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:03:27,359] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:03:27,416] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:03:27,472] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:03:27,527] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:03:27,583] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:03:27,639] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:03:27,695] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:03:27,751] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:03:35,980] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:03:35,998] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:03:36,024] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:03:36,050] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:03:36,077] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:03:36,262] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:03:36,318] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:03:36,381] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:03:36,444] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:03:36,510] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:03:36,575] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:03:36,640] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:03:36,704] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:03:36,770] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:03:36,836] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:03:36,902] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:03:36,966] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:03:37,032] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:03:37,096] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:03:37,161] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:03:37,227] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:03:37,292] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:03:37,357] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:03:37,423] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:03:37,488] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:03:37,553] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:03:37,618] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:03:37,683] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:03:37,748] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:03:37,814] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:03:37,880] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:03:37,946] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:03:38,012] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:03:38,082] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:03:38,142] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:03:38,208] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:03:48,377] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:03:48,395] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:03:48,411] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:03:48,428] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:03:48,444] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:03:48,461] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:03:48,482] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:03:48,547] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:03:48,599] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:03:48,649] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:03:48,720] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:03:48,783] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:03:48,834] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:03:48,885] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:03:48,941] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:03:48,996] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:03:49,051] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:03:49,103] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:03:49,175] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:03:49,224] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:03:49,277] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:03:49,333] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:03:49,390] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:03:49,446] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:03:49,504] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:03:49,559] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:03:49,615] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:03:49,671] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:03:49,728] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:03:49,783] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:03:49,840] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:03:49,897] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:03:49,968] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:03:50,024] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:03:50,080] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:03:50,133] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:03:58,898] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:03:58,915] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:03:58,955] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:03:58,997] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:03:59,039] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:03:59,083] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:03:59,130] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:03:59,174] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:03:59,216] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:03:59,259] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:03:59,303] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:03:59,348] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:03:59,392] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:03:59,668] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:03:59,710] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:03:59,767] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:03:59,823] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:03:59,882] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:03:59,939] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:03:59,997] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:04:00,055] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:04:00,112] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:04:00,170] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:04:00,228] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:04:00,288] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:04:00,347] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:04:00,406] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:04:00,465] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:04:00,523] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:04:00,582] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:04:00,642] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:04:00,701] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:04:00,761] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:04:00,819] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:04:00,877] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:04:00,935] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:04:09,406] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:04:09,425] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:04:09,474] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:04:09,524] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:04:09,574] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:04:09,625] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:04:09,677] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:04:09,726] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:04:09,777] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:04:09,827] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:04:09,878] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:04:09,929] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:04:09,981] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:04:10,032] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:04:10,083] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:04:10,135] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:04:10,185] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:04:10,237] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:04:10,288] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:04:10,339] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:04:10,390] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:04:10,442] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:04:10,493] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:04:10,544] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:04:10,598] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:04:10,650] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:04:10,706] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:04:10,762] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:04:10,866] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:04:10,919] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:04:10,974] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:04:11,028] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:04:11,082] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:04:11,137] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:04:11,188] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:04:11,241] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:04:18,965] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:04:18,984] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:04:19,043] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:04:19,106] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:04:19,169] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:04:19,233] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:04:19,295] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:04:19,586] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:04:19,643] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:04:19,704] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:04:19,768] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:04:19,831] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:04:19,892] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:04:19,954] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:04:20,018] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:04:20,090] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:04:20,146] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:04:20,212] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:04:20,278] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:04:20,342] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:04:20,405] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:04:20,471] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:04:20,535] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:04:20,599] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:04:20,664] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:04:20,729] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:04:20,795] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:04:20,860] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:04:20,926] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:04:20,991] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:04:21,059] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:04:21,124] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:04:21,191] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:04:21,257] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:04:21,323] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:04:21,386] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:04:31,118] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:04:31,138] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:04:31,156] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:04:31,174] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:04:31,193] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:04:31,212] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:04:31,231] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:04:31,250] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:04:31,269] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:04:31,288] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:04:31,307] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:04:31,340] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:04:31,371] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:04:31,402] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:04:31,430] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:04:31,459] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:04:31,490] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:04:31,520] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:04:31,550] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:04:31,581] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:04:31,612] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:04:31,642] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:04:31,673] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:04:31,704] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:04:31,735] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:04:31,765] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:04:31,796] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:04:31,826] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:04:31,857] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:04:31,887] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:04:31,918] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:04:31,949] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:04:31,979] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:04:32,015] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:04:32,045] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:04:32,076] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:04:36,388] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:04:36,407] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:04:36,454] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:04:36,504] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:04:36,555] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:04:36,607] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:04:36,659] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:04:36,710] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:04:36,761] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:04:36,812] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:04:36,864] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:04:36,915] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:04:36,967] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:04:37,019] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:04:37,072] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:04:37,125] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:04:37,178] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:04:37,231] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:04:37,285] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:04:37,339] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:04:37,392] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:04:37,446] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:04:37,500] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:04:37,553] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:04:37,608] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:04:37,662] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:04:37,716] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:04:37,770] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:04:37,824] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:04:37,877] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:04:37,931] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:04:37,984] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:04:38,039] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:04:38,093] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:04:38,147] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:04:38,201] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:04:46,682] [WARNING] [stage3.py:2497:step] 4 pytorch allocator cache flushes since last step. this happens when there is high memory pressure and is detrimental to performance. if this is happening frequently consider adjusting settings to reduce memory consumption. If you are unable to make the cache flushes go away consider adding get_accelerator().empty_cache() calls in your training loop to ensure that all ranks flush their caches at the same time
|
||
|
||
41%|████ | 7/17 [09:49<14:23, 86.35s/it]
|
||
|
||
{'loss': '0.3011', 'grad_norm': '0.9261', 'learning_rate': '8.909e-06', 'ppl': '1.351', 'memory/max_active (GiB)': '89.11', 'memory/max_allocated (GiB)': '89.11', 'memory/device_reserved (GiB)': '91.99', 'tokens/train_per_sec_per_gpu': '104', 'tokens/total': 4987968, 'tokens/trainable': 1768543, 'epoch': '1.174'}
|
||
|
||
41%|████ | 7/17 [09:49<14:23, 86.35s/it][2026-04-24 01:04:46,708] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:04:46,740] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:04:46,796] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:04:46,854] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:04:46,910] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:04:46,966] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:04:47,023] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:04:47,080] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:04:47,137] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:04:47,194] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:04:47,252] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:04:47,309] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:04:47,365] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:04:47,423] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:04:47,481] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:04:47,538] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:04:47,595] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:04:47,653] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:04:47,710] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:04:47,767] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:04:47,824] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:04:47,882] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:04:47,938] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:04:47,996] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:04:48,054] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:04:48,111] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:04:48,168] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:04:48,225] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:04:48,283] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:04:48,340] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:04:48,398] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:04:48,456] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:04:48,514] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:04:48,572] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:04:48,629] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:04:48,686] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:04:56,861] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:04:56,879] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:04:56,918] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:04:56,959] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:04:57,001] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:04:57,043] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:04:57,089] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:04:57,133] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:04:57,174] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:04:57,216] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:04:57,257] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:04:57,300] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:04:57,345] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:04:57,390] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:04:57,432] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:04:57,476] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:04:57,519] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:04:57,569] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:04:57,624] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:04:57,678] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:04:57,732] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:04:57,786] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:04:57,840] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:04:57,894] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:04:57,951] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:04:58,004] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:04:58,058] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:04:58,112] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:04:58,167] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:04:58,222] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:04:58,276] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:04:58,331] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:04:58,385] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:04:58,440] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:04:58,493] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:04:58,548] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:06,418] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:06,436] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:06,484] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:06,534] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:06,585] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:06,636] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:06,686] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:06,736] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:06,787] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:06,838] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:06,890] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:06,939] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:06,991] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:07,043] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:07,095] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:07,148] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:07,199] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:07,251] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:07,302] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:07,353] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:07,404] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:07,456] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:07,507] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:07,560] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:07,612] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:07,662] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:07,713] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:07,765] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:07,817] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:07,868] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:07,919] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:07,971] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:08,022] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:08,074] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:08,126] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:08,175] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:15,681] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:15,700] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:15,734] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:15,769] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:15,805] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:15,842] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:15,880] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:15,917] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:15,954] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:15,990] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:16,026] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:16,062] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:16,100] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:16,138] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:16,176] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:16,212] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:16,248] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:16,286] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:16,325] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:16,367] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:16,413] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:16,461] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:16,507] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:16,552] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:16,648] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:16,684] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:16,720] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:16,756] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:16,793] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:16,834] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:16,885] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:16,926] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:16,974] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:17,019] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:17,064] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:17,109] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:23,380] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:23,398] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:23,424] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:23,450] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:23,477] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:23,505] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:23,534] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:23,560] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:23,588] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:23,616] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:23,642] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:23,671] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:23,712] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:23,753] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:23,795] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:23,837] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:23,879] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:23,922] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:23,964] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:24,007] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:24,049] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:24,091] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:24,133] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:24,176] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:24,219] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:24,261] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:24,304] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:24,346] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:24,388] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:24,430] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:24,472] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:24,514] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:24,556] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:24,600] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:24,641] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:24,684] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:30,488] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:30,507] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:30,528] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:30,550] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:30,571] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:30,593] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:30,616] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:30,648] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:30,699] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:30,770] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:30,822] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:30,867] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:31,053] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:31,100] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:31,155] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:31,212] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:31,244] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:31,295] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:31,343] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:31,514] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:31,556] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:31,608] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:31,661] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:31,715] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:31,772] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:31,823] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:31,879] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:31,935] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:31,989] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:32,044] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:32,096] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:32,151] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:32,207] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:32,279] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:32,558] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:32,577] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:40,592] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:40,609] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:40,633] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:40,658] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:40,684] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:40,710] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:40,736] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:40,762] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:40,808] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:40,855] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:40,921] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:40,964] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:41,017] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:41,076] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:41,124] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:41,177] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:41,230] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:41,284] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:41,338] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:41,392] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:41,445] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:41,499] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:41,553] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:41,606] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:41,661] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:41,714] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:41,768] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:41,821] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:41,874] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:41,927] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:41,981] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:42,034] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:42,088] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:42,142] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:42,195] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:42,249] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:49,925] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:49,944] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:49,972] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:50,002] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:50,032] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:50,064] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:50,095] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:50,125] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:50,156] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:50,187] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:50,218] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:50,248] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:50,279] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:50,323] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:50,364] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:50,408] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:50,452] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:50,495] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:50,546] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:50,592] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:50,633] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:50,674] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:50,717] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:50,761] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:50,806] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:50,849] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:50,893] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:50,936] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:50,986] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:51,032] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:51,072] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:51,114] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:51,158] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:51,203] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:51,246] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:51,291] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
|
||
47%|████▋ | 8/17 [11:00<12:13, 81.49s/it]
|
||
|
||
{'loss': '0.3301', 'grad_norm': '0.7954', 'learning_rate': '8.117e-06', 'ppl': '1.391', 'memory/max_active (GiB)': '82.08', 'memory/max_allocated (GiB)': '80.73', 'memory/device_reserved (GiB)': '92.46', 'tokens/train_per_sec_per_gpu': '83.97', 'tokens/total': 5661568, 'tokens/trainable': 1995082, 'epoch': '1.348'}
|
||
|
||
47%|████▋ | 8/17 [11:00<12:13, 81.49s/it][2026-04-24 01:05:57,791] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:57,823] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:57,877] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:57,934] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:57,991] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:58,046] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:58,103] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:58,159] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:58,215] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:58,272] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:58,330] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:58,387] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:58,442] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:58,500] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:58,556] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:58,613] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:58,670] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:58,728] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:58,785] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:58,842] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:58,899] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:58,956] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:59,013] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:59,070] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:59,128] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:59,186] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:59,241] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:59,299] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:59,356] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:59,413] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:59,471] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:59,528] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:59,586] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:59,642] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:59,700] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:05:59,757] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:06:08,268] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:06:08,286] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:06:08,349] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:06:08,418] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:06:08,483] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:06:08,552] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:06:08,619] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:06:08,685] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:06:08,753] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:06:08,820] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:06:08,887] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:06:08,955] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:06:09,022] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:06:09,088] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:06:09,156] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:06:09,223] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:06:09,289] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:06:09,356] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:06:09,423] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:06:09,490] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:06:09,557] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:06:09,624] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:06:09,691] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:06:09,758] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:06:09,825] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:06:09,892] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:06:09,959] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:06:10,026] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:06:10,093] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:06:10,160] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:06:10,227] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:06:10,295] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:06:10,362] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:06:10,444] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:06:10,497] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:06:10,566] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:06:19,661] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:06:19,679] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:06:19,712] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:06:19,746] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:06:19,783] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:06:19,820] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:06:19,854] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:06:19,890] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:06:19,924] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:06:19,962] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:06:20,023] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:06:20,084] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:06:20,145] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:06:20,207] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:06:20,269] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:06:20,331] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:06:20,392] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:06:20,454] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:06:20,516] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:06:20,580] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:06:20,643] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:06:20,705] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:06:20,766] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:06:20,829] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:06:20,891] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:06:20,953] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:06:21,015] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:06:21,078] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:06:21,141] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:06:21,204] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:06:21,266] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:06:21,328] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:06:21,391] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:06:21,453] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:06:21,515] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:06:21,578] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:06:30,391] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:06:30,410] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:06:30,451] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:06:30,494] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:06:30,538] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:06:30,582] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:06:30,628] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:06:30,675] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:06:30,720] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:06:30,767] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:06:30,811] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:06:30,856] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:06:30,902] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:06:30,951] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:06:30,998] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:06:31,046] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:06:31,092] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:06:31,136] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:06:31,183] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:06:31,232] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:06:31,279] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:06:31,330] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:06:31,384] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:06:31,440] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:06:31,495] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:06:31,551] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:06:31,607] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:06:31,663] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:06:31,719] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:06:31,774] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:06:31,829] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:06:31,885] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:06:31,940] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:06:31,995] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:06:32,050] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:06:32,105] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:06:40,397] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:06:40,415] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:06:40,444] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:06:40,475] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:06:40,505] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:06:40,535] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:06:40,567] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:06:40,599] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:06:40,647] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:06:40,707] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:06:40,772] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:06:40,847] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:06:40,911] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:06:40,963] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:06:41,027] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:06:41,091] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:06:41,154] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:06:41,230] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:06:41,290] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:06:41,345] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:06:41,410] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:06:41,474] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:06:41,537] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:06:41,601] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:06:41,667] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:06:41,731] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:06:41,793] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:06:41,858] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:06:41,922] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:06:41,982] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:06:42,046] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:06:42,111] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:06:42,177] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:06:42,238] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:06:42,302] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:06:42,369] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:06:52,076] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:06:52,095] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:06:52,142] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:06:52,194] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:06:52,245] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:06:52,297] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:06:52,349] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:06:52,399] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:06:52,450] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:06:52,501] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:06:52,555] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:06:52,608] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:06:52,662] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:06:52,716] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:06:52,769] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:06:52,824] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:06:52,877] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:06:52,931] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:06:52,985] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:06:53,040] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:06:53,094] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:06:53,147] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:06:53,201] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:06:53,257] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:06:53,311] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:06:53,365] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:06:53,420] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:06:53,473] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:06:53,524] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:06:53,577] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:06:53,628] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:06:53,680] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:06:53,733] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:06:53,786] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:06:53,839] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:06:53,894] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:07:02,453] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:07:02,472] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:07:02,521] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:07:02,573] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:07:02,626] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:07:02,678] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:07:02,731] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:07:02,784] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:07:02,838] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:07:02,891] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:07:02,945] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:07:03,001] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:07:03,059] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:07:03,115] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:07:03,171] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:07:03,224] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:07:03,277] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:07:03,512] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:07:03,559] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:07:03,610] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:07:03,662] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:07:03,715] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:07:03,767] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:07:03,817] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:07:03,868] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:07:03,920] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:07:03,970] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:07:04,022] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:07:04,075] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:07:04,128] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:07:04,182] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:07:04,236] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:07:04,289] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:07:04,343] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:07:04,399] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:07:04,453] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:07:13,355] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:07:13,375] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:07:13,413] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:07:13,453] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:07:13,493] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:07:13,534] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:07:13,575] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:07:13,615] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:07:13,656] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:07:13,699] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:07:13,743] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:07:13,787] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:07:13,830] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:07:13,871] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:07:13,912] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:07:13,955] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:07:13,998] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:07:14,042] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:07:14,083] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:07:14,125] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:07:14,168] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:07:14,210] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:07:14,252] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:07:14,295] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:07:14,339] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:07:14,385] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:07:14,433] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:07:14,481] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:07:14,529] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:07:14,578] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:07:14,626] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:07:14,674] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:07:14,723] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:07:14,772] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:07:14,820] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:07:14,868] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:07:22,353] [WARNING] [stage3.py:2497:step] 7 pytorch allocator cache flushes since last step. this happens when there is high memory pressure and is detrimental to performance. if this is happening frequently consider adjusting settings to reduce memory consumption. If you are unable to make the cache flushes go away consider adding get_accelerator().empty_cache() calls in your training loop to ensure that all ranks flush their caches at the same time
|
||
|
||
53%|█████▎ | 9/17 [12:25<10:59, 82.46s/it]
|
||
|
||
{'loss': '0.2988', 'grad_norm': '0.5848', 'learning_rate': '7.169e-06', 'ppl': '1.348', 'memory/max_active (GiB)': '91.94', 'memory/max_allocated (GiB)': '91.94', 'memory/device_reserved (GiB)': '92.84', 'tokens/train_per_sec_per_gpu': '90', 'tokens/total': 6427648, 'tokens/trainable': 2262824, 'epoch': '1.522'}
|
||
|
||
53%|█████▎ | 9/17 [12:25<10:59, 82.46s/it][2026-04-24 01:07:22,377] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:07:22,409] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:07:22,463] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:07:22,518] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:07:22,574] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:07:22,629] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:07:22,685] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:07:22,742] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:07:22,797] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:07:22,852] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:07:22,907] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:07:22,962] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:07:23,018] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:07:23,075] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:07:23,131] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:07:23,188] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:07:23,243] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:07:23,301] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:07:23,356] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:07:23,413] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:07:23,469] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:07:23,525] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:07:23,581] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:07:23,637] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:07:23,693] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:07:23,749] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:07:23,806] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:07:23,862] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:07:23,919] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:07:23,975] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:07:24,032] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:07:24,088] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:07:24,146] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:07:24,203] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:07:24,259] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:07:24,316] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:07:33,124] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:07:33,143] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:07:33,194] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:07:33,248] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:07:33,302] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:07:33,357] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:07:33,414] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:07:33,468] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:07:33,523] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:07:33,578] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:07:33,634] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:07:33,692] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:07:33,750] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:07:33,809] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:07:33,868] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:07:33,927] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:07:33,984] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:07:34,043] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:07:34,102] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:07:34,162] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:07:34,221] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:07:34,280] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:07:34,335] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:07:34,390] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:07:34,446] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:07:34,500] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:07:34,556] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:07:34,613] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:07:34,671] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:07:34,728] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:07:34,788] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:07:34,847] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:07:34,906] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:07:34,967] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:07:35,025] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:07:35,083] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:07:44,134] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:07:44,151] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:07:44,200] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:07:44,252] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:07:44,304] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:07:44,356] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:07:44,409] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:07:44,459] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:07:44,511] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:07:44,563] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:07:44,615] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:07:44,666] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:07:44,718] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:07:44,770] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:07:44,821] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:07:44,875] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:07:44,926] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:07:44,977] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:07:45,030] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:07:45,082] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:07:45,134] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:07:45,186] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:07:45,237] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:07:45,289] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:07:45,341] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:07:45,393] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:07:45,444] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:07:45,497] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:07:45,548] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:07:45,600] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:07:45,652] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:07:45,704] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:07:45,757] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:07:45,810] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:07:45,861] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:07:45,913] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:07:52,815] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:07:52,833] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:07:52,858] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:07:52,883] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:07:52,909] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:07:52,934] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:07:52,961] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:07:52,987] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:07:53,046] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:07:53,104] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:07:53,154] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:07:53,206] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:07:53,264] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:07:53,321] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:07:53,378] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:07:53,436] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:07:53,492] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:07:53,563] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:07:53,617] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:07:53,668] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:07:53,724] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:07:53,780] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:07:53,837] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:07:53,894] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:07:53,952] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:07:54,009] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:07:54,066] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:07:54,138] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:07:54,189] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:07:54,241] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:07:54,299] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:07:54,356] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:07:54,413] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:07:54,471] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:07:54,527] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:07:54,585] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:08:03,231] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:08:03,250] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:08:03,299] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:08:03,351] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:08:03,403] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:08:03,456] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:08:03,511] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:08:03,566] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:08:03,622] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:08:03,675] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:08:03,726] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:08:03,779] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:08:03,835] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:08:03,892] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:08:03,944] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:08:03,998] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:08:04,051] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:08:04,106] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:08:04,164] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:08:04,220] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:08:04,285] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:08:04,350] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:08:04,416] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:08:04,483] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:08:04,551] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:08:04,616] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:08:04,682] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:08:04,749] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:08:04,814] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:08:04,882] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:08:04,951] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:08:05,014] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:08:05,080] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:08:05,149] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:08:05,212] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:08:05,279] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:08:15,296] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:08:15,313] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:08:15,374] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:08:15,441] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:08:15,507] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:08:15,574] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:08:15,642] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:08:15,707] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:08:15,773] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:08:15,837] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:08:15,903] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:08:15,970] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:08:16,035] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:08:16,099] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:08:16,165] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:08:16,231] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:08:16,296] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:08:16,360] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:08:16,425] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:08:16,490] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:08:16,555] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:08:16,619] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:08:16,683] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:08:16,750] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:08:16,815] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:08:16,879] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:08:16,944] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:08:17,010] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:08:17,074] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:08:17,139] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:08:17,205] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:08:17,271] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:08:17,336] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:08:17,402] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:08:17,467] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:08:17,531] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:08:27,392] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:08:27,411] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:08:27,436] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:08:27,462] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:08:27,488] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:08:27,514] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:08:27,541] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:08:27,566] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:08:27,597] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:08:27,647] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:08:27,703] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:08:27,751] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:08:27,803] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:08:27,860] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:08:27,907] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:08:27,961] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:08:28,014] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:08:28,068] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:08:28,120] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:08:28,173] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:08:28,280] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:08:28,327] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:08:28,356] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:08:28,392] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:08:28,452] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:08:28,497] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:08:28,550] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:08:28,608] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:08:28,655] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:08:28,708] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:08:28,770] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:08:28,815] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:08:28,868] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:08:28,922] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:08:28,973] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:08:29,027] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:08:36,749] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:08:36,767] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:08:36,829] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:08:36,895] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:08:36,960] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:08:37,027] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:08:37,092] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:08:37,157] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:08:37,223] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:08:37,503] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:08:37,524] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:08:37,586] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:08:37,651] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:08:37,715] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:08:37,780] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:08:37,846] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:08:37,912] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:08:37,977] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:08:38,043] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:08:38,111] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:08:38,177] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:08:38,244] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:08:38,311] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:08:38,377] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:08:38,442] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:08:38,507] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:08:38,574] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:08:38,639] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:08:38,706] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:08:38,773] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:08:38,839] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:08:38,906] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:08:38,973] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:08:39,043] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:08:39,108] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:08:39,175] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:08:48,574] [WARNING] [stage3.py:2497:step] 9 pytorch allocator cache flushes since last step. this happens when there is high memory pressure and is detrimental to performance. if this is happening frequently consider adjusting settings to reduce memory consumption. If you are unable to make the cache flushes go away consider adding get_accelerator().empty_cache() calls in your training loop to ensure that all ranks flush their caches at the same time
|
||
|
||
59%|█████▉ | 10/17 [13:51<09:45, 83.62s/it]
|
||
|
||
{'loss': '0.2942', 'grad_norm': '0.5016', 'learning_rate': '6.113e-06', 'ppl': '1.342', 'memory/max_active (GiB)': '91.71', 'memory/max_allocated (GiB)': '91.71', 'memory/device_reserved (GiB)': '92.84', 'tokens/train_per_sec_per_gpu': '96.17', 'tokens/total': 7202112, 'tokens/trainable': 2547440, 'epoch': '1.696'}
|
||
|
||
59%|█████▉ | 10/17 [13:51<09:45, 83.62s/it][2026-04-24 01:08:48,597] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:08:48,630] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:08:48,688] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:08:48,748] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:08:48,808] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:08:48,869] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:08:48,929] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:08:48,989] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:08:49,050] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:08:49,109] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:08:49,170] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:08:49,231] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:08:49,291] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:08:49,351] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:08:49,412] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:08:49,472] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:08:49,532] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:08:49,593] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:08:49,654] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:08:49,714] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:08:49,774] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:08:49,834] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:08:49,895] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:08:49,956] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:08:50,016] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:08:50,077] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:08:50,138] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:08:50,198] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:08:50,259] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:08:50,321] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:08:50,382] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:08:50,444] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:08:50,506] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:08:50,567] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:08:50,629] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:08:50,691] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:08:59,477] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:08:59,498] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:08:59,513] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:08:59,527] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:08:59,542] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:08:59,557] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:08:59,576] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:08:59,636] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:08:59,683] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:08:59,735] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:08:59,784] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:08:59,858] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:08:59,887] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:08:59,939] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:08:59,990] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:09:00,061] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:09:00,111] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:09:00,165] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:09:00,219] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:09:00,273] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:09:00,326] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:09:00,374] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:09:00,427] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:09:00,481] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:09:00,534] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:09:00,587] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:09:00,640] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:09:00,694] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:09:00,746] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:09:00,795] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:09:00,847] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:09:00,905] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:09:00,958] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:09:01,012] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:09:01,059] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:09:01,116] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:09:08,704] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:09:08,723] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:09:08,772] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:09:08,821] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:09:08,872] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:09:08,925] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:09:08,977] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:09:09,031] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:09:09,081] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:09:09,136] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:09:09,188] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:09:09,240] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:09:09,293] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:09:09,347] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:09:09,400] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:09:09,454] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:09:09,506] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:09:09,559] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:09:09,612] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:09:09,665] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:09:09,718] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:09:09,771] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:09:09,823] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:09:09,876] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:09:09,929] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:09:09,982] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:09:10,036] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:09:10,090] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:09:10,142] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:09:10,196] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:09:10,249] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:09:10,302] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:09:10,356] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:09:10,410] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:09:10,463] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:09:10,515] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:09:18,553] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:09:18,572] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:09:18,594] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:09:18,616] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:09:18,640] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:09:18,663] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:09:18,687] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:09:18,740] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:09:18,788] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:09:18,840] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:09:18,891] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:09:18,956] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:09:19,009] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:09:19,058] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:09:19,109] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:09:19,164] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:09:19,220] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:09:19,274] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:09:19,356] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:09:19,408] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:09:19,463] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:09:19,510] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:09:19,565] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:09:19,615] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:09:19,671] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:09:19,725] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:09:19,781] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:09:19,837] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:09:19,893] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:09:19,948] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:09:20,004] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:09:20,060] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:09:20,116] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:09:20,171] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:09:20,239] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:09:20,294] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:09:28,393] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:09:28,411] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:09:28,460] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:09:28,512] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:09:28,564] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:09:28,616] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:09:28,668] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:09:28,721] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:09:28,776] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:09:28,832] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:09:28,886] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:09:28,941] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:09:28,995] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:09:29,048] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:09:29,101] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:09:29,157] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:09:29,213] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:09:29,268] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:09:29,323] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:09:29,375] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:09:29,428] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:09:29,483] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:09:29,567] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:09:29,617] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:09:29,670] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:09:29,725] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:09:29,781] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:09:29,837] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:09:29,900] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:09:29,962] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:09:30,024] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:09:30,086] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:09:30,148] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:09:30,211] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:09:30,272] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:09:30,335] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:09:40,021] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:09:40,040] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:09:40,095] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:09:40,155] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:09:40,213] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:09:40,272] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:09:40,331] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:09:40,389] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:09:40,448] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:09:40,507] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:09:40,568] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:09:40,632] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:09:40,692] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:09:40,752] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:09:40,813] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:09:40,873] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:09:40,936] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:09:40,998] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:09:41,059] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:09:41,120] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:09:41,180] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:09:41,240] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:09:41,301] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:09:41,364] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:09:41,424] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:09:41,484] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:09:41,545] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:09:41,606] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:09:41,667] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:09:41,727] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:09:41,789] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:09:41,849] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:09:41,909] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:09:41,971] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:09:42,031] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:09:42,089] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:09:51,121] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:09:51,139] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:09:51,173] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:09:51,207] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:09:51,243] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:09:51,278] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:09:51,315] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:09:51,352] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:09:51,387] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:09:51,438] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:09:51,510] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:09:51,568] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:09:51,633] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:09:51,697] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:09:51,762] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:09:51,827] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:09:51,891] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:09:51,955] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:09:52,020] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:09:52,083] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:09:52,148] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:09:52,213] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:09:52,277] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:09:52,341] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:09:52,406] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:09:52,471] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:09:52,536] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:09:52,601] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:09:52,666] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:09:52,730] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:09:52,794] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:09:52,858] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:09:52,923] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:09:52,987] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:09:53,051] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:09:53,115] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:02,557] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:02,575] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:02,598] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:02,621] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:02,646] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:02,669] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:02,694] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:02,723] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:02,775] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:02,823] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:02,873] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:02,926] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:02,978] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:03,032] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:03,085] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:03,138] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:03,205] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:03,254] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:03,303] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:03,355] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:03,408] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:03,461] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:03,514] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:03,566] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:03,619] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:03,685] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:03,734] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:04,003] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:04,022] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:04,045] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:04,069] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:04,093] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:04,117] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:04,142] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:04,166] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:04,211] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:12,306] [WARNING] [stage3.py:2497:step] 5 pytorch allocator cache flushes since last step. this happens when there is high memory pressure and is detrimental to performance. if this is happening frequently consider adjusting settings to reduce memory consumption. If you are unable to make the cache flushes go away consider adding get_accelerator().empty_cache() calls in your training loop to ensure that all ranks flush their caches at the same time
|
||
|
||
65%|██████▍ | 11/17 [15:15<08:21, 83.65s/it]
|
||
|
||
{'loss': '0.2633', 'grad_norm': '0.4303', 'learning_rate': '5e-06', 'ppl': '1.301', 'memory/max_active (GiB)': '86.28', 'memory/max_allocated (GiB)': '86.28', 'memory/device_reserved (GiB)': '92.69', 'tokens/train_per_sec_per_gpu': '91.25', 'tokens/total': 7982208, 'tokens/trainable': 2829357, 'epoch': '1.87'}
|
||
|
||
65%|██████▍ | 11/17 [15:15<08:21, 83.65s/it][2026-04-24 01:10:12,355] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:12,390] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:12,452] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:12,514] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:12,576] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:12,637] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:12,699] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:12,761] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:12,823] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:12,886] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:12,948] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:13,010] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:13,072] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:13,134] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:13,197] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:13,260] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:13,322] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:13,385] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:13,447] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:13,510] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:13,572] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:13,635] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:13,697] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:13,760] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:13,822] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:13,885] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:13,947] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:14,009] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:14,072] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:14,134] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:14,196] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:14,260] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:14,323] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:14,386] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:14,449] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:14,511] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:23,616] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:23,635] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:23,677] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:23,724] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:23,771] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:23,816] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:23,863] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:23,912] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:23,961] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:24,012] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:24,059] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:24,107] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:24,153] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:24,200] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:24,249] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:24,298] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:24,348] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:24,398] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:24,449] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:24,496] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:24,545] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:24,594] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:24,641] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:24,690] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:24,739] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:24,789] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:24,842] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:24,899] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:24,953] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:25,009] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:25,064] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:25,118] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:25,174] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:25,245] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:25,295] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:25,350] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:33,480] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:33,499] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:33,539] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:33,583] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:33,626] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:33,668] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:33,709] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:33,751] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:33,794] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:33,837] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:33,879] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:33,923] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:33,966] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:34,008] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:34,050] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:34,093] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:34,135] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:34,178] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:34,223] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:34,265] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:34,308] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:34,350] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:34,392] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:34,435] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:34,478] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:34,521] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:34,563] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:34,607] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:34,649] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:34,691] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:34,734] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:34,776] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:34,820] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:34,863] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:34,907] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:34,948] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:40,779] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:40,797] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:40,828] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:40,859] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:40,892] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:40,925] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:40,958] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:40,992] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:41,029] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:41,086] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:41,140] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:41,196] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:41,256] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:41,315] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:41,376] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:41,437] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:41,497] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:41,556] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:41,633] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:41,693] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:41,744] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:41,800] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:41,860] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:41,920] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:41,981] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:42,040] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:42,101] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:42,162] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:42,238] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:42,295] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:42,352] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:42,408] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:42,468] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:42,530] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:42,590] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:42,651] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:51,420] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:51,439] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:51,475] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:51,512] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:51,550] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:51,588] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:51,626] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:51,665] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:51,705] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:51,745] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:51,783] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:51,823] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:51,862] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:51,902] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:51,942] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:51,982] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:52,022] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:52,063] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:52,104] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:52,144] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:52,184] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:52,224] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:52,264] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:52,304] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:52,344] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:52,384] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:52,425] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:52,467] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:52,506] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:52,546] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:52,586] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:52,626] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:52,666] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:52,706] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:52,746] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:52,783] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:58,415] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:58,437] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:58,455] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:58,472] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:58,488] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:58,504] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:58,517] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:58,570] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:58,628] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:58,684] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:58,739] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:58,794] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:58,850] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:58,905] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:58,961] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:59,017] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:59,072] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:59,127] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:59,183] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:59,238] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:59,293] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:59,348] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:59,403] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:59,459] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:59,514] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:59,569] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:59,624] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:59,680] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:59,735] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:10:59,791] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:11:00,015] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:11:00,069] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:11:00,126] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:11:00,161] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:11:00,215] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:11:00,267] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:11:08,895] [WARNING] [stage3.py:2497:step] 2 pytorch allocator cache flushes since last step. this happens when there is high memory pressure and is detrimental to performance. if this is happening frequently consider adjusting settings to reduce memory consumption. If you are unable to make the cache flushes go away consider adding get_accelerator().empty_cache() calls in your training loop to ensure that all ranks flush their caches at the same time
|
||
|
||
71%|███████ | 12/17 [16:11<06:17, 75.42s/it]
|
||
|
||
{'loss': '0.2589', 'grad_norm': '0.4857', 'learning_rate': '3.887e-06', 'ppl': '1.296', 'memory/max_active (GiB)': '87.83', 'memory/max_allocated (GiB)': '87.83', 'memory/device_reserved (GiB)': '92.14', 'tokens/train_per_sec_per_gpu': '133.5', 'tokens/total': 8532736, 'tokens/trainable': 3019949, 'epoch': '2'}
|
||
|
||
71%|███████ | 12/17 [16:11<06:17, 75.42s/it][2026-04-24 01:11:14,791] [INFO] [axolotl.core.trainers.base._save:741] [PID:248539] Saving model checkpoint to /e/data1/datasets/playground/ot-baf/checkpoints/sera-v4-316-axolotl__Qwen3-8B/checkpoint-12
|
||
|
||
|
||
Writing model shards: 0%| | 0/1 [00:00<?, ?it/s][A
|
||
|
||
Writing model shards: 100%|██████████| 1/1 [00:06<00:00, 6.96s/it][A
|
||
Writing model shards: 100%|██████████| 1/1 [00:06<00:00, 6.96s/it]
|
||
[2026-04-24 01:11:45,177] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:11:45,213] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:11:45,266] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:11:45,318] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:11:45,371] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:11:45,423] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:11:45,475] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:11:45,528] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:11:45,580] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:11:45,633] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:11:45,685] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:11:45,738] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:11:45,792] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:11:45,846] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:11:45,899] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:11:45,953] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:11:46,006] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:11:46,059] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:11:46,113] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:11:46,168] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:11:46,220] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:11:46,274] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:11:46,328] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:11:46,380] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:11:46,433] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:11:46,486] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:11:46,540] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:11:46,593] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:11:46,647] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:11:46,701] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:11:46,754] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:11:46,809] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:11:46,862] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:11:46,916] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:11:46,971] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:11:47,025] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:11:54,507] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:11:54,526] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:11:54,558] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:11:54,590] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:11:54,623] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:11:54,657] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:11:54,976] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:11:55,015] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:11:55,065] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:11:55,118] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:11:55,171] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:11:55,203] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:11:55,237] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:11:55,289] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:11:55,653] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:11:55,704] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:11:55,752] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:11:55,805] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:11:55,862] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:11:55,917] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:11:55,974] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:11:56,026] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:11:56,076] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:11:56,128] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:11:56,164] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:11:56,217] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:11:56,269] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:11:56,320] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:11:56,372] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:11:56,429] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:11:56,487] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:11:56,540] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:11:56,595] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:11:56,649] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:11:56,699] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:11:56,753] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:04,622] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:04,641] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:04,657] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:05,065] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:05,083] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:05,101] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:05,117] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:05,132] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:05,147] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:05,162] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:05,176] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:05,191] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:05,206] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:05,219] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:05,242] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:05,284] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:05,326] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:05,375] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:05,415] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:05,459] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:05,501] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:05,549] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:05,588] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:05,632] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:05,677] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:05,721] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:05,765] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:05,809] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:05,854] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:05,898] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:05,942] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:05,985] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:06,029] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:06,074] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:06,118] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:06,161] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:12,395] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:12,412] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:12,426] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:12,439] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:12,452] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:12,466] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:12,479] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:12,495] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:12,527] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:12,564] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:12,597] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:12,626] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:12,658] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:12,691] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:12,724] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:12,756] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:12,788] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:12,821] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:12,853] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:12,886] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:12,923] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:12,957] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:12,986] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:13,019] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:13,052] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:13,085] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:13,118] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:13,151] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:13,184] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:13,222] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:13,255] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:13,287] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:13,317] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:13,350] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:13,382] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:13,415] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:17,999] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:18,017] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:18,040] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:18,064] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:18,088] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:18,113] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:18,139] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:18,164] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:18,189] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:18,214] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:18,240] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:18,264] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:18,289] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:18,324] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:18,359] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:18,394] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:18,430] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:18,466] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:18,501] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:18,537] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:18,573] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:18,611] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:18,647] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:18,683] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:18,719] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:18,755] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:18,791] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:18,827] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:18,863] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:18,899] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:18,935] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:18,972] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:19,009] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:19,045] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:19,081] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:19,117] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:24,124] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:24,146] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:24,161] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:24,175] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:24,190] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:24,204] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:24,219] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:24,270] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:24,317] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:24,369] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:24,418] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:24,468] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:24,517] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:24,566] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:24,617] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:24,665] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:24,716] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:24,765] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:24,815] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:24,864] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:24,914] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:24,963] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:25,013] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:25,064] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:25,129] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:25,177] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:25,228] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:25,280] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:25,330] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:25,381] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:25,427] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:25,478] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:25,528] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:25,579] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:25,630] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:25,677] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:32,922] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:32,941] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:32,976] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:33,012] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:33,048] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:33,086] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:33,122] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:33,159] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:33,198] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:33,236] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:33,281] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:33,333] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:33,387] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:33,449] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:33,502] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:33,558] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:33,615] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:33,674] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:33,733] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:33,792] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:33,850] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:33,922] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:33,980] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:34,034] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:34,089] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:34,144] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:34,202] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:34,259] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:34,318] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:34,376] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:34,434] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:34,492] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:34,551] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:34,609] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:34,667] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:34,739] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:43,317] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:43,336] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:43,380] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:43,428] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:43,475] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:43,524] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:43,572] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:43,621] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:43,671] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:43,721] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:43,771] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:43,822] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:43,874] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:43,926] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:43,973] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:44,022] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:44,071] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:44,121] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:44,173] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:44,225] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:44,275] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:44,328] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:44,378] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:44,431] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:44,478] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:44,528] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:44,577] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:44,628] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:44,678] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:44,731] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:44,782] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:44,834] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:44,890] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:44,946] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:45,002] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:45,058] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:53,950] [WARNING] [stage3.py:2497:step] 1 pytorch allocator cache flushes since last step. this happens when there is high memory pressure and is detrimental to performance. if this is happening frequently consider adjusting settings to reduce memory consumption. If you are unable to make the cache flushes go away consider adding get_accelerator().empty_cache() calls in your training loop to ensure that all ranks flush their caches at the same time
|
||
|
||
76%|███████▋ | 13/17 [17:56<05:37, 84.40s/it]
|
||
|
||
{'loss': '0.2582', 'grad_norm': '0.466', 'learning_rate': '2.831e-06', 'ppl': '1.295', 'memory/max_active (GiB)': '78.49', 'memory/max_allocated (GiB)': '78.49', 'memory/device_reserved (GiB)': '92.24', 'tokens/train_per_sec_per_gpu': '110.3', 'tokens/total': 9151424, 'tokens/trainable': 3244601, 'epoch': '2.174'}
|
||
|
||
76%|███████▋ | 13/17 [17:56<05:37, 84.40s/it][2026-04-24 01:12:53,983] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:54,013] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:54,060] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:54,110] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:54,160] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:54,209] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:54,259] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:54,309] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:54,359] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:54,409] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:54,459] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:54,509] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:54,560] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:54,610] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:54,661] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:54,710] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:54,761] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:54,811] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:54,861] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:54,912] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:54,962] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:55,013] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:55,062] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:55,112] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:55,163] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:55,213] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:55,263] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:55,314] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:55,364] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:55,415] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:55,466] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:55,516] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:55,566] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:55,617] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:55,667] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:12:55,718] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:13:02,941] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:13:02,961] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:13:02,997] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:13:03,037] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:13:03,075] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:13:03,117] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:13:03,158] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:13:03,199] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:13:03,238] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:13:03,279] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:13:03,320] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:13:03,361] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:13:03,401] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:13:03,441] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:13:03,490] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:13:03,718] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:13:03,755] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:13:03,794] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:13:03,832] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:13:03,870] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:13:03,910] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:13:03,952] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:13:04,270] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:13:04,307] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:13:04,346] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:13:04,384] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:13:04,422] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:13:04,462] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:13:04,500] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:13:04,541] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:13:04,584] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:13:04,621] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:13:04,660] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:13:04,701] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:13:04,750] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:13:04,805] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:13:12,757] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:13:12,776] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:13:12,820] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:13:12,869] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:13:12,915] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:13:12,963] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:13:13,011] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:13:13,059] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:13:13,109] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:13:13,156] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:13:13,206] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:13:13,253] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:13:13,305] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:13:13,353] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:13:13,402] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:13:13,452] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:13:13,502] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:13:13,551] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:13:13,602] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:13:13,652] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:13:13,703] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:13:13,754] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:13:14,133] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:13:14,174] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:13:14,220] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:13:14,266] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:13:14,313] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:13:14,360] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:13:14,408] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:13:14,453] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:13:14,500] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:13:14,547] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:13:14,593] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:13:14,641] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:13:14,689] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:13:14,735] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:13:21,810] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:13:21,830] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:13:21,884] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:13:21,943] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:13:22,002] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:13:22,062] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:13:22,120] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:13:22,178] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:13:22,238] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:13:22,296] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:13:22,355] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:13:22,415] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:13:22,476] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:13:22,534] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:13:22,594] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:13:22,654] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:13:22,713] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:13:22,771] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:13:22,829] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:13:22,888] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:13:22,948] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:13:23,010] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:13:23,068] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:13:23,129] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:13:23,191] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:13:23,251] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:13:23,312] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:13:23,372] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:13:23,433] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:13:23,494] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:13:23,557] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:13:23,618] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:13:23,679] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:13:23,741] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:13:23,802] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:13:23,863] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:13:33,233] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:13:33,254] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:13:33,283] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:13:33,314] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:13:33,344] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:13:33,375] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:13:33,407] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:13:33,437] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:13:33,471] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:13:33,532] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:13:33,599] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:13:33,652] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:13:33,715] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:13:33,783] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:13:33,834] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:13:33,896] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:13:33,956] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:13:34,015] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:13:34,076] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:13:34,136] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:13:34,196] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:13:34,258] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:13:34,316] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:13:34,376] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:13:34,436] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:13:34,496] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:13:34,557] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:13:34,617] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:13:34,676] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:13:34,737] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:13:34,798] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:13:34,857] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:13:34,917] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:13:34,979] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:13:35,038] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:13:35,098] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:13:44,582] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:13:44,602] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:13:44,651] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:13:44,705] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:13:44,758] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:13:44,813] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:13:44,870] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:13:44,926] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:13:44,982] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:13:45,038] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:13:45,094] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:13:45,151] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:13:45,208] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:13:45,262] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:13:45,317] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:13:45,372] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:13:45,427] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:13:45,484] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:13:45,541] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:13:45,596] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:13:45,652] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:13:45,709] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:13:45,763] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:13:45,817] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:13:45,873] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:13:45,929] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:13:45,985] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:13:46,040] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:13:46,097] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:13:46,153] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:13:46,211] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:13:46,266] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:13:46,322] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:13:46,379] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:13:46,435] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:13:46,491] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:13:55,225] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:13:55,246] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:13:55,262] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:13:55,277] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:13:55,291] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:13:55,306] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:13:55,343] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:13:55,406] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:13:55,467] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:13:55,526] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:13:55,585] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:13:55,646] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:13:55,742] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:13:55,803] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:13:55,861] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:13:55,928] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:13:55,987] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:13:56,049] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:13:56,109] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:13:56,172] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:13:56,235] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:13:56,293] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:13:56,356] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:13:56,419] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:13:56,482] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:13:56,547] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:13:56,608] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:13:56,671] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:13:56,730] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:13:56,791] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:13:56,855] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:13:56,916] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:13:56,979] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:13:57,041] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:13:57,122] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:13:57,185] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:14:07,489] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:14:07,508] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:14:07,563] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:14:07,617] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:14:07,669] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:14:07,729] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:14:07,787] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:14:07,844] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:14:07,902] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:14:07,960] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:14:08,017] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:14:08,076] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:14:08,138] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:14:08,198] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:14:08,258] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:14:08,316] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:14:08,373] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:14:08,433] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:14:08,493] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:14:08,552] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:14:08,611] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:14:08,671] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:14:08,729] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:14:08,789] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:14:08,848] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:14:08,907] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:14:08,966] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:14:09,025] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:14:09,084] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:14:09,144] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:14:09,205] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:14:09,263] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:14:09,323] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:14:09,382] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:14:09,442] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:14:09,500] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:14:18,519] [WARNING] [stage3.py:2497:step] 5 pytorch allocator cache flushes since last step. this happens when there is high memory pressure and is detrimental to performance. if this is happening frequently consider adjusting settings to reduce memory consumption. If you are unable to make the cache flushes go away consider adding get_accelerator().empty_cache() calls in your training loop to ensure that all ranks flush their caches at the same time
|
||
|
||
82%|████████▏ | 14/17 [19:21<04:13, 84.45s/it]
|
||
|
||
{'loss': '0.2509', 'grad_norm': '0.4023', 'learning_rate': '1.883e-06', 'ppl': '1.285', 'memory/max_active (GiB)': '86.28', 'memory/max_allocated (GiB)': '86.28', 'memory/device_reserved (GiB)': '92.82', 'tokens/train_per_sec_per_gpu': '104.1', 'tokens/total': 9943488, 'tokens/trainable': 3519758, 'epoch': '2.348'}
|
||
|
||
82%|████████▏ | 14/17 [19:21<04:13, 84.45s/it][2026-04-24 01:14:18,551] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:14:18,585] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:14:18,644] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:14:18,703] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:14:18,761] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:14:18,819] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:14:18,877] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:14:18,937] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:14:18,995] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:14:19,052] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:14:19,110] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:14:19,582] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:14:19,639] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:14:19,698] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:14:19,756] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:14:19,812] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:14:19,870] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:14:19,928] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:14:19,985] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:14:20,044] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:14:20,103] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:14:20,161] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:14:20,220] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:14:20,279] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:14:20,337] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:14:20,395] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:14:20,453] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:14:20,511] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:14:20,570] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:14:20,628] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:14:20,686] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:14:20,745] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:14:20,804] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:14:20,862] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:14:20,921] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:14:20,980] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:14:29,140] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:14:29,161] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:14:29,196] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:14:29,233] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:14:29,270] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:14:29,309] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:14:29,348] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:14:29,385] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:14:29,422] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:14:29,459] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:14:29,496] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:14:29,539] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:14:29,594] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:14:29,651] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:14:29,709] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:14:29,766] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:14:29,824] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:14:29,882] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:14:29,951] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:14:30,009] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:14:30,061] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:14:30,116] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:14:30,170] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:14:30,229] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:14:30,288] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:14:30,525] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:14:30,561] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:14:30,608] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:14:30,645] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:14:30,682] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:14:30,720] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:14:30,757] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:14:30,794] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:14:30,834] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:14:30,883] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:14:30,952] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:14:39,616] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:14:39,636] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:14:39,659] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:14:39,682] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:14:39,707] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:14:39,731] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:14:39,757] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:14:39,781] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:14:39,817] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:14:39,869] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:14:39,924] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:14:39,970] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:14:40,021] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:14:40,072] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:14:40,129] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:14:40,175] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:14:40,227] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:14:40,278] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:14:40,331] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:14:40,382] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:14:40,433] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:14:40,500] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:14:40,551] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:14:40,603] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:14:40,655] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:14:40,710] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:14:40,761] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:14:40,813] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:14:40,862] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:14:40,913] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:14:40,965] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:14:41,018] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:14:41,070] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:14:41,122] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:14:41,173] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:14:41,225] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:14:48,700] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:14:48,720] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:14:48,770] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:14:48,825] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:14:48,879] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:14:48,932] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:14:48,988] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:14:49,042] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:14:49,097] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:14:49,154] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:14:49,212] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:14:49,269] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:14:49,329] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:14:49,387] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:14:49,765] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:14:49,814] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:14:49,867] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:14:49,919] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:14:49,973] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:14:50,027] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:14:50,081] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:14:50,136] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:14:50,193] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:14:50,246] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:14:50,300] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:14:50,356] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:14:50,411] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:14:50,467] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:14:50,522] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:14:50,579] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:14:50,636] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:14:50,692] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:14:50,747] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:14:50,802] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:14:50,856] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:14:50,909] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:15:00,240] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:15:00,261] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:15:00,308] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:15:00,357] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:15:00,407] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:15:00,457] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:15:00,508] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:15:00,557] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:15:00,607] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:15:00,656] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:15:00,706] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:15:00,755] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:15:00,808] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:15:00,858] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:15:00,909] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:15:00,958] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:15:01,008] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:15:01,058] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:15:01,108] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:15:01,158] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:15:01,209] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:15:01,262] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:15:01,311] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:15:01,360] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:15:01,410] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:15:01,460] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:15:01,510] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:15:01,560] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:15:01,610] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:15:01,659] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:15:01,711] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:15:01,760] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:15:01,810] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:15:01,861] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:15:01,912] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:15:01,960] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:15:09,089] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:15:09,108] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:15:09,129] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:15:09,151] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:15:09,172] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:15:09,194] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:15:09,216] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:15:09,283] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:15:09,339] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:15:09,408] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:15:09,483] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:15:09,540] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:15:09,605] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:15:09,670] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:15:09,749] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:15:09,803] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:15:09,867] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:15:09,932] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:15:09,998] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:15:10,064] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:15:10,128] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:15:10,195] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:15:10,260] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:15:10,325] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:15:10,389] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:15:10,455] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:15:10,520] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:15:10,584] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:15:10,649] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:15:10,714] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:15:10,780] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:15:10,845] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:15:10,910] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:15:10,975] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:15:11,041] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:15:11,106] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:15:20,745] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:15:20,765] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:15:20,802] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:15:20,841] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:15:20,880] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:15:20,921] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:15:20,961] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:15:21,001] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:15:21,041] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:15:21,079] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:15:21,122] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:15:21,186] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:15:21,264] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:15:21,321] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:15:21,381] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:15:21,445] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:15:21,509] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:15:21,586] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:15:21,649] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:15:21,702] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:15:21,767] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:15:21,834] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:15:21,897] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:15:21,961] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:15:22,025] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:15:22,090] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:15:22,154] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:15:22,218] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:15:22,282] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:15:22,347] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:15:22,412] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:15:22,475] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:15:22,540] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:15:22,603] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:15:22,667] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:15:22,733] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:15:32,268] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:15:32,289] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:15:32,339] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:15:32,392] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:15:32,444] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:15:32,497] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:15:32,551] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:15:32,606] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:15:32,658] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:15:32,711] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:15:32,764] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:15:32,817] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:15:32,872] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:15:32,924] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:15:32,977] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:15:33,030] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:15:33,085] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:15:33,139] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:15:33,194] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:15:33,248] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:15:33,301] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:15:33,355] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:15:33,408] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:15:33,461] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:15:33,514] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:15:33,567] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:15:33,621] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:15:33,675] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:15:33,730] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:15:33,784] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:15:33,840] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:15:33,892] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:15:33,946] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:15:33,999] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:15:34,054] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:15:34,106] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:15:41,963] [WARNING] [stage3.py:2497:step] 3 pytorch allocator cache flushes since last step. this happens when there is high memory pressure and is detrimental to performance. if this is happening frequently consider adjusting settings to reduce memory consumption. If you are unable to make the cache flushes go away consider adding get_accelerator().empty_cache() calls in your training loop to ensure that all ranks flush their caches at the same time
|
||
|
||
88%|████████▊ | 15/17 [20:44<02:48, 84.15s/it]
|
||
|
||
{'loss': '0.2461', 'grad_norm': '0.4038', 'learning_rate': '1.091e-06', 'ppl': '1.279', 'memory/max_active (GiB)': '82.86', 'memory/max_allocated (GiB)': '82.86', 'memory/device_reserved (GiB)': '92.75', 'tokens/train_per_sec_per_gpu': '94.41', 'tokens/total': 10704896, 'tokens/trainable': 3782803, 'epoch': '2.522'}
|
||
|
||
88%|████████▊ | 15/17 [20:44<02:48, 84.15s/it][2026-04-24 01:15:41,995] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:15:42,027] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:15:42,081] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:15:42,135] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:15:42,189] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:15:42,244] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:15:42,299] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:15:42,353] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:15:42,408] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:15:42,463] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:15:42,517] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:15:42,571] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:15:42,628] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:15:42,682] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:15:42,737] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:15:42,791] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:15:42,845] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:15:42,901] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:15:42,957] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:15:43,012] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:15:43,067] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:15:43,549] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:15:43,598] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:15:43,652] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:15:43,706] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:15:43,761] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:15:43,815] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:15:43,869] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:15:43,923] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:15:43,978] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:15:44,032] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:15:44,088] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:15:44,144] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:15:44,200] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:15:44,255] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:15:44,311] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:15:51,931] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:15:51,950] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:15:52,004] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:15:52,062] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:15:52,120] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:15:52,178] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:15:52,237] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:15:52,295] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:15:52,355] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:15:52,415] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:15:52,473] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:15:52,532] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:15:52,592] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:15:52,651] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:15:52,711] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:15:52,771] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:15:52,831] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:15:52,892] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:15:52,954] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:15:53,015] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:15:53,076] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:15:53,140] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:15:53,200] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:15:53,262] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:15:53,322] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:15:53,383] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:15:53,444] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:15:53,506] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:15:53,567] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:15:53,628] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:15:53,692] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:15:53,753] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:15:53,815] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:15:53,877] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:15:53,940] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:15:54,001] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:16:03,810] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:16:03,829] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:16:03,889] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:16:03,952] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:16:04,015] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:16:04,077] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:16:04,141] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:16:04,204] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:16:04,267] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:16:04,332] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:16:04,397] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:16:04,463] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:16:04,528] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:16:04,593] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:16:04,659] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:16:04,723] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:16:04,787] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:16:04,853] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:16:04,918] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:16:04,983] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:16:05,048] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:16:05,117] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:16:05,182] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:16:05,266] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:16:05,331] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:16:05,398] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:16:05,466] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:16:05,531] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:16:05,608] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:16:05,671] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:16:05,739] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:16:05,804] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:16:05,870] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:16:05,935] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:16:06,002] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:16:06,067] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:16:16,504] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:16:16,525] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:16:16,574] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:16:16,627] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:16:16,678] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:16:16,731] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:16:16,784] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:16:16,838] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:16:16,894] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:16:16,951] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:16:17,006] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:16:17,058] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:16:17,112] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:16:17,164] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:16:17,218] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:16:17,274] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:16:17,330] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:16:17,383] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:16:17,436] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:16:17,488] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:16:17,543] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:16:17,599] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:16:17,655] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:16:17,711] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:16:17,783] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:16:17,849] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:16:17,905] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:16:17,963] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:16:18,024] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:16:18,086] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:16:18,149] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:16:18,211] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:16:18,274] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:16:18,337] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:16:18,399] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:16:18,461] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:16:28,249] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:16:28,269] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:16:28,300] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:16:28,331] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:16:28,365] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:16:28,398] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:16:28,431] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:16:28,465] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:16:28,498] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:16:28,530] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:16:28,568] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:16:28,623] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:16:28,676] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:16:28,728] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:16:28,781] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:16:28,835] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:16:28,888] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:16:28,942] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:16:28,996] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:16:29,050] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:16:29,103] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:16:29,158] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:16:29,212] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:16:29,266] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:16:29,322] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:16:29,374] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:16:29,428] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:16:29,482] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:16:29,535] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:16:29,589] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:16:29,641] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:16:29,697] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:16:29,750] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:16:29,803] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:16:29,859] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:16:29,913] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:16:38,185] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:16:38,206] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:16:38,266] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:16:38,331] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:16:38,395] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:16:38,461] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:16:38,525] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:16:38,589] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:16:38,654] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:16:38,719] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:16:38,783] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:16:38,849] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:16:38,914] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:16:38,978] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:16:39,043] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:16:39,108] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:16:39,171] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:16:39,236] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:16:39,300] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:16:39,365] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:16:39,430] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:16:39,494] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:16:39,561] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:16:39,626] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:16:39,690] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:16:39,755] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:16:39,820] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:16:39,885] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:16:39,950] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:16:40,015] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:16:40,080] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:16:40,145] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:16:40,210] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:16:40,276] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:16:40,342] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:16:40,405] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:16:49,250] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:16:49,270] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:16:49,318] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:16:49,368] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:16:49,420] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:16:49,472] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:16:49,526] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:16:49,578] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:16:49,631] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:16:49,684] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:16:49,736] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:16:49,790] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:16:49,843] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:16:49,895] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:16:49,949] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:16:50,003] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:16:50,056] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:16:50,111] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:16:50,164] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:16:50,218] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:16:50,273] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:16:50,329] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:16:50,384] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:16:50,438] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:16:50,494] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:16:50,550] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:16:50,606] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:16:50,660] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:16:50,715] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:16:50,770] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:16:50,826] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:16:50,881] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:16:50,937] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:16:50,994] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:16:51,050] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:16:51,105] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:16:59,915] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:16:59,937] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:16:59,969] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:17:00,003] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:17:00,038] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:17:00,072] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:17:00,107] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:17:00,143] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:17:00,177] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:17:00,213] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:17:00,276] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:17:00,323] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:17:00,381] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:17:00,449] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:17:00,502] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:17:00,558] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:17:00,618] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:17:00,675] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:17:00,734] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:17:00,793] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:17:00,852] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:17:00,911] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:17:00,969] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:17:01,027] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:17:01,086] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:17:01,144] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:17:01,202] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:17:01,261] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:17:01,319] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:17:01,378] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:17:01,437] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:17:01,496] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:17:01,555] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:17:01,613] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:17:01,671] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:17:01,730] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:17:10,445] [WARNING] [stage3.py:2497:step] 8 pytorch allocator cache flushes since last step. this happens when there is high memory pressure and is detrimental to performance. if this is happening frequently consider adjusting settings to reduce memory consumption. If you are unable to make the cache flushes go away consider adding get_accelerator().empty_cache() calls in your training loop to ensure that all ranks flush their caches at the same time
|
||
|
||
94%|█████████▍| 16/17 [22:13<01:25, 85.45s/it]
|
||
|
||
{'loss': '0.2324', 'grad_norm': '0.3565', 'learning_rate': '4.952e-07', 'ppl': '1.262', 'memory/max_active (GiB)': '90.41', 'memory/max_allocated (GiB)': '90.41', 'memory/device_reserved (GiB)': '92.14', 'tokens/train_per_sec_per_gpu': '94.39', 'tokens/total': 11515968, 'tokens/trainable': 4077807, 'epoch': '2.696'}
|
||
|
||
94%|█████████▍| 16/17 [22:13<01:25, 85.45s/it][2026-04-24 01:17:10,478] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:17:10,510] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:17:10,562] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:17:10,617] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:17:10,673] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:17:10,727] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:17:10,781] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:17:10,836] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:17:10,892] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:17:10,947] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:17:11,002] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:17:11,056] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:17:11,112] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:17:11,168] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:17:11,222] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:17:11,276] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:17:11,331] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:17:11,386] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:17:11,441] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:17:11,497] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:17:11,552] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:17:11,607] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:17:11,662] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:17:11,718] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:17:11,773] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:17:11,828] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:17:11,882] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:17:11,938] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:17:11,994] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:17:12,049] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:17:12,104] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:17:12,160] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:17:12,216] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:17:12,272] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:17:12,327] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:17:12,382] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:17:20,298] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:17:20,319] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:17:20,577] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:17:20,593] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:17:20,614] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:17:20,636] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:17:20,658] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:17:20,680] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:17:20,701] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:17:20,724] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:17:20,745] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:17:20,767] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:17:20,800] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:17:20,854] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:17:20,916] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:17:20,980] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:17:21,041] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:17:21,101] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:17:21,163] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:17:21,224] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:17:21,287] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:17:21,347] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:17:21,409] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:17:21,467] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:17:21,532] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:17:21,593] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:17:21,654] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:17:21,714] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:17:21,777] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:17:21,837] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:17:21,899] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:17:21,960] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:17:22,023] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:17:22,096] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:17:22,146] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:17:22,206] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:17:30,935] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:17:30,955] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:17:31,004] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:17:31,054] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:17:31,107] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:17:31,159] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:17:31,214] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:17:31,269] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:17:31,326] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:17:31,378] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:17:31,431] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:17:31,484] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:17:31,540] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:17:31,597] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:17:31,651] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:17:31,704] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:17:31,757] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:17:31,809] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:17:31,865] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:17:31,932] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:17:31,997] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:17:32,062] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:17:32,129] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:17:32,195] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:17:32,261] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:17:32,328] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:17:32,394] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:17:32,754] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:17:32,816] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:17:32,878] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:17:32,943] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:17:33,012] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:17:33,080] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:17:33,149] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:17:33,215] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:17:33,284] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:17:44,384] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:17:44,405] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:17:44,437] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:17:44,471] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:17:44,506] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:17:44,541] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:17:44,576] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:17:44,610] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:17:44,645] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:17:44,681] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:17:44,715] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:17:44,750] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:17:44,793] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:17:44,833] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:17:44,882] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:17:44,932] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:17:44,988] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:17:45,035] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:17:45,092] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:17:45,142] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:17:45,189] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:17:45,235] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:17:45,283] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:17:45,333] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:17:45,383] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:17:45,432] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:17:45,482] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:17:45,531] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:17:45,581] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:17:45,642] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:17:45,692] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:17:45,737] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:17:45,785] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:17:45,834] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:17:45,883] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:17:45,934] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:17:52,948] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:17:52,968] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:17:52,997] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:17:53,027] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:17:53,058] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:17:53,088] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:17:53,120] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:17:53,151] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:17:53,202] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:17:53,264] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:17:53,319] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:17:53,373] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:17:53,432] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:17:53,493] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:17:53,553] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:17:53,614] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:17:53,675] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:17:53,736] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:17:53,795] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:17:53,856] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:17:53,917] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:17:53,978] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:17:54,053] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:17:54,115] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:17:54,170] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:17:54,227] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:17:54,288] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:17:54,348] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:17:54,409] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:17:54,470] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:17:54,531] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:17:54,592] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:17:54,652] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:17:55,093] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:17:55,118] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:17:55,146] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:18:03,777] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:18:03,798] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:18:03,850] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:18:03,907] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:18:03,962] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:18:04,019] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:18:04,076] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:18:04,132] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:18:04,191] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:18:04,249] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:18:04,305] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:18:04,363] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:18:04,422] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:18:04,479] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:18:04,537] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:18:04,595] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:18:04,653] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:18:04,710] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:18:04,767] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:18:04,824] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:18:04,881] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:18:04,938] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:18:04,996] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:18:05,053] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:18:05,110] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:18:05,167] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:18:05,224] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:18:05,282] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:18:05,339] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:18:05,396] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:18:05,454] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:18:05,512] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:18:05,569] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:18:05,627] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:18:05,685] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:18:05,742] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:18:13,679] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:18:13,700] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:18:13,725] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:18:13,752] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:18:13,778] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:18:13,805] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:18:13,832] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:18:13,858] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:18:13,892] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:18:13,961] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:18:14,026] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:18:14,077] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:18:14,134] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:18:14,185] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:18:14,238] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:18:14,291] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:18:14,346] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:18:14,403] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:18:14,458] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:18:14,514] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:18:14,584] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:18:14,639] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:18:14,688] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:18:14,741] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:18:14,797] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:18:14,856] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:18:14,913] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:18:14,967] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:18:15,039] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:18:15,102] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:18:15,153] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:18:15,210] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:18:15,261] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:18:15,315] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:18:15,368] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:18:15,425] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:18:24,405] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:18:24,425] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:18:24,450] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:18:24,475] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:18:24,501] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:18:24,528] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:18:24,554] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:18:24,581] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:18:24,608] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:18:24,635] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:18:24,674] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:18:24,718] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:18:24,759] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:18:24,800] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:18:24,844] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:18:24,887] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:18:24,930] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:18:24,974] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:18:25,018] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:18:25,062] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:18:25,105] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:18:25,149] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:18:25,192] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:18:25,236] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:18:25,292] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:18:25,333] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:18:25,374] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:18:25,414] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:18:25,457] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:18:25,503] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:18:25,545] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:18:25,588] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:18:25,632] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:18:25,675] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:18:25,719] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:18:25,763] [DEBUG] [axolotl.monkeypatch.deepspeed_utils.new_setattr:43] [PID:248539] Forwarded ds_grads_remaining to wrapped module Qwen3DecoderLayer
|
||
[2026-04-24 01:18:32,147] [WARNING] [stage3.py:2497:step] 4 pytorch allocator cache flushes since last step. this happens when there is high memory pressure and is detrimental to performance. if this is happening frequently consider adjusting settings to reduce memory consumption. If you are unable to make the cache flushes go away consider adding get_accelerator().empty_cache() calls in your training loop to ensure that all ranks flush their caches at the same time
|
||
|
||
100%|██████████| 17/17 [23:35<00:00, 84.32s/it]
|
||
|
||
{'loss': '0.2635', 'grad_norm': '0.4023', 'learning_rate': '1.254e-07', 'ppl': '1.302', 'memory/max_active (GiB)': '85.22', 'memory/max_allocated (GiB)': '85.22', 'memory/device_reserved (GiB)': '92.9', 'tokens/train_per_sec_per_gpu': '81.89', 'tokens/total': 12245888, 'tokens/trainable': 4346410, 'epoch': '2.87'}
|
||
|
||
100%|██████████| 17/17 [23:35<00:00, 84.32s/it][2026-04-24 01:18:34,458] [INFO] [axolotl.core.trainers.base._save:741] [PID:248539] Saving model checkpoint to /e/data1/datasets/playground/ot-baf/checkpoints/sera-v4-316-axolotl__Qwen3-8B/checkpoint-17
|
||
|
||
|
||
Writing model shards: 0%| | 0/1 [00:00<?, ?it/s][A
|
||
|
||
Writing model shards: 100%|██████████| 1/1 [00:07<00:00, 7.04s/it][A
|
||
Writing model shards: 100%|██████████| 1/1 [00:07<00:00, 7.04s/it]
|
||
|
||
|
||
{'train_runtime': '1443', 'train_samples_per_second': '0.377', 'train_steps_per_second': '0.012', 'train_loss': '0.3317', 'memory/max_active (GiB)': '31.77', 'memory/max_allocated (GiB)': '31.77', 'memory/device_reserved (GiB)': '88.45', 'epoch': '2.87', 'tokens/train_per_sec_per_gpu': '0'}
|
||
|
||
100%|██████████| 17/17 [24:02<00:00, 84.32s/it]
|
||
100%|██████████| 17/17 [24:02<00:00, 84.86s/it]
|
||
[2026-04-24 01:19:00,596] [INFO] [axolotl.train.save_trained_model:241] [PID:248539] Training completed! Saving trained model to /e/data1/datasets/playground/ot-baf/checkpoints/sera-v4-316-axolotl__Qwen3-8B.
|
||
[2026-04-24 01:19:02,802] [INFO] [axolotl.core.trainers.base._save:741] [PID:248539] Saving model checkpoint to /e/data1/datasets/playground/ot-baf/checkpoints/sera-v4-316-axolotl__Qwen3-8B
|
||
|
||
Writing model shards: 0%| | 0/1 [00:00<?, ?it/s]
|
||
Writing model shards: 100%|██████████| 1/1 [00:06<00:00, 6.92s/it]
|
||
Writing model shards: 100%|██████████| 1/1 [00:06<00:00, 6.92s/it]
|
||
[2026-04-24 01:19:09,933] [INFO] [axolotl.train.save_trained_model:355] [PID:248539] Model successfully saved to /e/data1/datasets/playground/ot-baf/checkpoints/sera-v4-316-axolotl__Qwen3-8B
|