Files
Qwen1.5-MOE-aux-free-sft-ma…/moe_bias_states.json
ModelHub XC 9d78c2114f 初始化项目,由ModelHub XC社区提供模型
Model: xd2010/Qwen1.5-MOE-aux-free-sft-math7k-1e-3-gamma-1epo
Source: Original Platform
2026-06-24 12:44:55 +08:00

1667 lines
46 KiB
JSON

{
"metadata": {
"total_moe_layers": 24,
"save_timestamp": "2026-03-22T21:22:45.786159",
"model_type": "Qwen2MoeForCausalLM",
"pytorch_version": "2.6.0+cu124",
"description": "Auxiliary-loss-free MoE bias states saved during training"
},
"moe_bias_states": {
"model.layers.0.mlp": {
"bias_values": [
0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.4059983193874359,
-0.4299980103969574,
0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
0.4299980103969574,
-0.4299980103969574,
0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
0.4299980103969574,
0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
0.4299980103969574,
-0.4299980103969574,
-0.4259980618953705,
-0.4299980103969574,
0.3499990403652191,
-0.4299980103969574,
0.4299980103969574,
-0.4299980103969574,
0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
0.4299980103969574,
0.4299980103969574,
0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
0.4299980103969574,
0.3519990146160126,
0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574
],
"bias_update_speed": 0.001,
"num_experts": 60,
"module_type": "AuxFreeQwen2MoeSparseMoeBlock",
"device": "cuda:0",
"dtype": "torch.float32"
},
"model.layers.1.mlp": {
"bias_values": [
0.4059983193874359,
-0.4299980103969574,
-0.39399847388267517,
0.4299980103969574,
-0.4099982678890228,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.07199996709823608,
-0.4059983193874359,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
0.4299980103969574,
0.42199811339378357,
-0.4299980103969574,
0.4299980103969574,
-0.4299980103969574,
0.4299980103969574,
-0.4299980103969574,
0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
0.4299980103969574,
0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
0.3219994008541107,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
0.4059983193874359,
0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.42199811339378357,
-0.4299980103969574,
-0.41799816489219666,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
0.4299980103969574,
-0.4299980103969574,
0.4299980103969574,
0.4299980103969574,
0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
0.4299980103969574,
-0.4299980103969574,
-0.4059983193874359,
0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574
],
"bias_update_speed": 0.001,
"num_experts": 60,
"module_type": "AuxFreeQwen2MoeSparseMoeBlock",
"device": "cuda:0",
"dtype": "torch.float32"
},
"model.layers.2.mlp": {
"bias_values": [
0.4299980103969574,
0.4099982678890228,
-0.4299980103969574,
-0.401998370885849,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
0.4299980103969574,
-0.4259980618953705,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
0.39399847388267517,
-0.4259980618953705,
-0.4299980103969574,
0.4299980103969574,
-0.4299980103969574,
0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.4259980618953705,
0.4299980103969574,
0.4299980103969574,
-0.4299980103969574,
0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
0.4299980103969574,
-0.36199888586997986,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
0.4299980103969574
],
"bias_update_speed": 0.001,
"num_experts": 60,
"module_type": "AuxFreeQwen2MoeSparseMoeBlock",
"device": "cuda:0",
"dtype": "torch.float32"
},
"model.layers.3.mlp": {
"bias_values": [
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
0.4299980103969574,
-0.401998370885849,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
0.4299980103969574,
0.42199811339378357,
-0.4299980103969574,
0.4299980103969574,
-0.4299980103969574,
0.4299980103969574,
-0.38999852538108826,
-0.4299980103969574,
-0.4299980103969574,
0.4299980103969574,
0.4299980103969574,
0.42199811339378357,
0.41399821639060974,
0.4299980103969574,
-0.4299980103969574,
0.4299980103969574,
-0.4299980103969574,
0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
0.4299980103969574,
0.4299980103969574,
-0.4299980103969574,
0.4299980103969574,
-0.4299980103969574,
0.4299980103969574,
0.4299980103969574,
0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
0.4299980103969574,
-0.4299980103969574,
0.4299980103969574,
-0.262000173330307,
-0.4299980103969574,
-0.4299980103969574,
0.4299980103969574,
-0.4299980103969574,
-0.3459990918636322,
0.4299980103969574,
0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.42199811339378357
],
"bias_update_speed": 0.001,
"num_experts": 60,
"module_type": "AuxFreeQwen2MoeSparseMoeBlock",
"device": "cuda:0",
"dtype": "torch.float32"
},
"model.layers.4.mlp": {
"bias_values": [
0.4299980103969574,
-0.4299980103969574,
0.4299980103969574,
0.3779986798763275,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
0.4299980103969574,
0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.41799816489219666,
-0.4299980103969574,
-0.4059983193874359,
-0.4299980103969574,
-0.4299980103969574,
0.4299980103969574,
0.4299980103969574,
0.4299980103969574,
-0.4299980103969574,
0.38999852538108826,
0.4299980103969574,
0.4299980103969574,
0.4299980103969574,
0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
0.4299980103969574,
0.4299980103969574,
0.4299980103969574,
-0.4299980103969574,
0.4299980103969574,
0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
0.4299980103969574,
-0.4299980103969574,
0.4299980103969574,
-0.4299980103969574,
0.401998370885849
],
"bias_update_speed": 0.001,
"num_experts": 60,
"module_type": "AuxFreeQwen2MoeSparseMoeBlock",
"device": "cuda:0",
"dtype": "torch.float32"
},
"model.layers.5.mlp": {
"bias_values": [
0.4299980103969574,
-0.4059983193874359,
-0.4299980103969574,
-0.42199811339378357,
-0.41399821639060974,
-0.4299980103969574,
0.41799816489219666,
0.42199811339378357,
-0.41399821639060974,
0.4259980618953705,
-0.4299980103969574,
0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
0.4299980103969574,
0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
0.4299980103969574,
-0.4299980103969574,
0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
0.4059983193874359,
0.4299980103969574,
-0.4299980103969574,
0.4299980103969574,
0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
0.4299980103969574,
0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
0.4259980618953705,
0.4299980103969574,
0.4299980103969574,
0.4299980103969574,
-0.4299980103969574,
0.4299980103969574,
0.42199811339378357
],
"bias_update_speed": 0.001,
"num_experts": 60,
"module_type": "AuxFreeQwen2MoeSparseMoeBlock",
"device": "cuda:0",
"dtype": "torch.float32"
},
"model.layers.6.mlp": {
"bias_values": [
0.4299980103969574,
0.4299980103969574,
-0.38599857687950134,
0.4299980103969574,
0.4299980103969574,
0.4299980103969574,
0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.41799816489219666,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
0.4299980103969574,
-0.4299980103969574,
0.24000030755996704,
-0.4299980103969574,
-0.4299980103969574,
0.4299980103969574,
0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.42199811339378357,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
0.4299980103969574,
0.4259980618953705,
0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.42199811339378357,
0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.41799816489219666,
-0.4059983193874359,
0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
0.4299980103969574,
0.41399821639060974,
-0.4299980103969574
],
"bias_update_speed": 0.001,
"num_experts": 60,
"module_type": "AuxFreeQwen2MoeSparseMoeBlock",
"device": "cuda:0",
"dtype": "torch.float32"
},
"model.layers.7.mlp": {
"bias_values": [
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
0.4299980103969574,
0.4299980103969574,
-0.4299980103969574,
0.4299980103969574,
0.4259980618953705,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
0.4299980103969574,
-0.401998370885849,
-0.4299980103969574,
-0.4299980103969574,
-0.4259980618953705,
-0.4299980103969574,
0.4299980103969574,
0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.3979984223842621,
0.4299980103969574,
0.4299980103969574,
0.22600027918815613,
0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
0.3979984223842621,
0.401998370885849,
0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
0.4299980103969574
],
"bias_update_speed": 0.001,
"num_experts": 60,
"module_type": "AuxFreeQwen2MoeSparseMoeBlock",
"device": "cuda:0",
"dtype": "torch.float32"
},
"model.layers.8.mlp": {
"bias_values": [
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
0.4299980103969574,
0.4299980103969574,
-0.4259980618953705,
0.4299980103969574,
-0.4299980103969574,
0.4299980103969574,
-0.4299980103969574,
0.4299980103969574,
-0.4299980103969574,
0.4299980103969574,
-0.4299980103969574,
-0.41799816489219666,
0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.41399821639060974,
0.4299980103969574,
-0.4299980103969574,
0.41799816489219666,
-0.4299980103969574,
-0.4299980103969574,
0.42199811339378357,
-0.42199811339378357,
0.3739987313747406,
-0.4299980103969574,
0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
0.4299980103969574,
0.38599857687950134,
0.4299980103969574,
0.4259980618953705,
-0.4299980103969574,
0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
0.4299980103969574,
0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
0.4299980103969574,
-0.4299980103969574,
0.4059983193874359,
0.4299980103969574,
-0.4299980103969574,
-0.41399821639060974,
-0.4299980103969574
],
"bias_update_speed": 0.001,
"num_experts": 60,
"module_type": "AuxFreeQwen2MoeSparseMoeBlock",
"device": "cuda:0",
"dtype": "torch.float32"
},
"model.layers.9.mlp": {
"bias_values": [
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
0.3779986798763275,
-0.4259980618953705,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
0.2899998128414154,
0.4299980103969574,
-0.4299980103969574,
0.4299980103969574,
0.4299980103969574,
-0.4299980103969574,
-0.4099982678890228,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
0.4299980103969574,
0.4299980103969574,
0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
0.4299980103969574,
0.401998370885849,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
0.4299980103969574,
-0.4299980103969574,
0.4299980103969574,
-0.4299980103969574,
0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
0.4299980103969574,
0.3419991433620453,
0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574
],
"bias_update_speed": 0.001,
"num_experts": 60,
"module_type": "AuxFreeQwen2MoeSparseMoeBlock",
"device": "cuda:0",
"dtype": "torch.float32"
},
"model.layers.10.mlp": {
"bias_values": [
-0.4299980103969574,
0.18200019001960754,
-0.4299980103969574,
-0.401998370885849,
-0.3659988343715668,
-0.4299980103969574,
-0.4299980103969574,
0.4299980103969574,
-0.4299980103969574,
-0.39399847388267517,
-0.4299980103969574,
-0.4259980618953705,
0.38999852538108826,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
0.4299980103969574,
-0.4299980103969574,
0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
0.4299980103969574,
-0.4299980103969574,
0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
0.4299980103969574,
0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
0.36199888586997986,
0.30599960684776306,
-0.4099982678890228,
-0.4299980103969574,
0.4299980103969574,
-0.4299980103969574,
0.4299980103969574,
-0.4259980618953705,
0.4259980618953705,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
0.4299980103969574,
0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
0.4299980103969574,
-0.4299980103969574,
0.3739987313747406,
0.4299980103969574,
0.38199862837791443
],
"bias_update_speed": 0.001,
"num_experts": 60,
"module_type": "AuxFreeQwen2MoeSparseMoeBlock",
"device": "cuda:0",
"dtype": "torch.float32"
},
"model.layers.11.mlp": {
"bias_values": [
-0.4299980103969574,
0.4299980103969574,
-0.4299980103969574,
0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
0.42199811339378357,
-0.4299980103969574,
0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
0.4299980103969574,
-0.4299980103969574,
0.3779986798763275,
0.4299980103969574,
-0.4259980618953705,
-0.4299980103969574,
-0.42199811339378357,
-0.4299980103969574,
-0.4299980103969574,
0.4299980103969574,
-0.4299980103969574,
-0.4259980618953705,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.41399821639060974,
-0.4299980103969574,
-0.4299980103969574,
0.3699987828731537,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
0.4299980103969574,
0.30599960684776306,
0.4299980103969574,
0.4299980103969574,
0.4299980103969574,
0.4299980103969574,
0.4299980103969574,
-0.4299980103969574,
0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.41799816489219666,
0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574
],
"bias_update_speed": 0.001,
"num_experts": 60,
"module_type": "AuxFreeQwen2MoeSparseMoeBlock",
"device": "cuda:0",
"dtype": "torch.float32"
},
"model.layers.12.mlp": {
"bias_values": [
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.4259980618953705,
-0.4299980103969574,
-0.4299980103969574,
0.36199888586997986,
-0.4299980103969574,
-0.4299980103969574,
0.4299980103969574,
-0.38999852538108826,
-0.42199811339378357,
0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
0.3779986798763275,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.4059983193874359,
-0.4299980103969574,
-0.4299980103969574,
0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.4259980618953705,
0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
0.4299980103969574,
0.3419991433620453,
0.4299980103969574,
0.4099982678890228,
0.4299980103969574,
-0.3699987828731537,
-0.4299980103969574,
-0.4299980103969574,
0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
0.4299980103969574,
-0.4299980103969574,
0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
0.42199811339378357,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
0.4299980103969574
],
"bias_update_speed": 0.001,
"num_experts": 60,
"module_type": "AuxFreeQwen2MoeSparseMoeBlock",
"device": "cuda:0",
"dtype": "torch.float32"
},
"model.layers.13.mlp": {
"bias_values": [
-0.4299980103969574,
0.2540002763271332,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
0.42199811339378357,
-0.4299980103969574,
0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
0.4299980103969574,
-0.4299980103969574,
0.3379991948604584,
0.4299980103969574,
-0.4299980103969574,
0.4299980103969574,
-0.4299980103969574,
-0.4059983193874359,
0.4099982678890228,
-0.4299980103969574,
-0.4299980103969574,
0.36199888586997986,
-0.4299980103969574,
0.4299980103969574,
-0.4299980103969574,
0.4299980103969574,
0.4299980103969574,
0.4299980103969574,
0.4299980103969574,
0.4299980103969574,
0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.38599857687950134,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
0.4299980103969574,
0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574
],
"bias_update_speed": 0.001,
"num_experts": 60,
"module_type": "AuxFreeQwen2MoeSparseMoeBlock",
"device": "cuda:0",
"dtype": "torch.float32"
},
"model.layers.14.mlp": {
"bias_values": [
0.4299980103969574,
0.4299980103969574,
0.4299980103969574,
0.3699987828731537,
-0.4299980103969574,
-0.4299980103969574,
-0.32999929785728455,
0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
0.4299980103969574,
0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.4259980618953705,
-0.1800001859664917,
0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
0.4299980103969574,
0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
0.4259980618953705,
-0.4259980618953705,
-0.4299980103969574,
-0.4299980103969574,
0.4299980103969574,
-0.4299980103969574,
0.4299980103969574,
0.4299980103969574,
-0.29799970984458923,
0.4299980103969574,
-0.4259980618953705,
-0.4299980103969574,
-0.4299980103969574,
0.4299980103969574,
-0.4299980103969574,
0.4299980103969574,
0.39399847388267517,
0.41799816489219666,
-0.4299980103969574,
0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574
],
"bias_update_speed": 0.001,
"num_experts": 60,
"module_type": "AuxFreeQwen2MoeSparseMoeBlock",
"device": "cuda:0",
"dtype": "torch.float32"
},
"model.layers.15.mlp": {
"bias_values": [
-0.4299980103969574,
-0.4299980103969574,
0.4299980103969574,
-0.4299980103969574,
-0.4099982678890228,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
0.4299980103969574,
-0.4299980103969574,
0.4299980103969574,
0.4299980103969574,
-0.4299980103969574,
0.4299980103969574,
-0.4299980103969574,
0.4299980103969574,
-0.4299980103969574,
0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
0.4299980103969574,
-0.4299980103969574,
0.4299980103969574,
0.4299980103969574,
-0.4299980103969574,
0.4299980103969574,
-0.4299980103969574,
0.2899998128414154,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
0.4299980103969574,
0.4299980103969574,
0.4299980103969574,
0.4299980103969574,
0.4099982678890228,
-0.4099982678890228,
-0.4259980618953705,
0.4299980103969574,
-0.4299980103969574,
-0.38599857687950134,
0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
0.4259980618953705,
-0.4099982678890228,
0.4299980103969574,
-0.4059983193874359,
0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.42199811339378357,
-0.4299980103969574,
-0.38999852538108826
],
"bias_update_speed": 0.001,
"num_experts": 60,
"module_type": "AuxFreeQwen2MoeSparseMoeBlock",
"device": "cuda:0",
"dtype": "torch.float32"
},
"model.layers.16.mlp": {
"bias_values": [
-0.4299980103969574,
0.4099982678890228,
0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
0.4299980103969574,
-0.18600019812583923,
-0.4299980103969574,
0.4299980103969574,
-0.4299980103969574,
-0.4259980618953705,
0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
0.36199888586997986,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
0.32599934935569763,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
0.4259980618953705,
0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.4259980618953705,
0.4299980103969574,
0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
0.4299980103969574,
-0.4299980103969574,
0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.38599857687950134,
-0.4259980618953705,
-0.4299980103969574,
0.4299980103969574,
0.3979984223842621,
0.4299980103969574,
-0.4299980103969574,
0.42199811339378357,
0.4299980103969574
],
"bias_update_speed": 0.001,
"num_experts": 60,
"module_type": "AuxFreeQwen2MoeSparseMoeBlock",
"device": "cuda:0",
"dtype": "torch.float32"
},
"model.layers.17.mlp": {
"bias_values": [
-0.4299980103969574,
-0.4299980103969574,
-0.39399847388267517,
0.4299980103969574,
-0.41399821639060974,
0.4299980103969574,
-0.41799816489219666,
0.4299980103969574,
-0.4299980103969574,
0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.401998370885849,
0.4299980103969574,
-0.4299980103969574,
0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.4259980618953705,
-0.4299980103969574,
-0.3179994523525238,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
0.4299980103969574,
0.21400025486946106,
0.38599857687950134,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
0.3779986798763275,
-0.4299980103969574,
-0.3659988343715668,
-0.4299980103969574,
0.4299980103969574,
0.33399924635887146,
-0.4299980103969574,
-0.4299980103969574,
-0.4099982678890228,
-0.4299980103969574,
0.4299980103969574,
-0.4299980103969574,
0.4299980103969574,
-0.4299980103969574,
0.4299980103969574,
0.4299980103969574,
-0.4299980103969574,
0.3659988343715668,
0.4299980103969574,
0.4259980618953705
],
"bias_update_speed": 0.001,
"num_experts": 60,
"module_type": "AuxFreeQwen2MoeSparseMoeBlock",
"device": "cuda:0",
"dtype": "torch.float32"
},
"model.layers.18.mlp": {
"bias_values": [
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.39399847388267517,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
0.4299980103969574,
-0.4299980103969574,
0.4119982421398163,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
0.4299980103969574,
0.4299980103969574,
-0.4299980103969574,
0.4299980103969574,
0.42199811339378357,
0.4299980103969574,
-0.4299980103969574,
-0.4259980618953705,
-0.4299980103969574,
-0.34399911761283875,
0.4299980103969574,
-0.41799816489219666,
0.4299980103969574,
0.4299980103969574,
0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
0.4299980103969574,
-0.42199811339378357,
-0.3779986798763275,
-0.4299980103969574,
0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.4259980618953705,
-0.4299980103969574,
0.35799893736839294,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
0.4299980103969574,
0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
0.4299980103969574,
-0.4299980103969574,
-0.35399898886680603,
0.4299980103969574,
-0.4259980618953705,
-0.4299980103969574,
-0.41799816489219666,
-0.4299980103969574
],
"bias_update_speed": 0.001,
"num_experts": 60,
"module_type": "AuxFreeQwen2MoeSparseMoeBlock",
"device": "cuda:0",
"dtype": "torch.float32"
},
"model.layers.19.mlp": {
"bias_values": [
0.4299980103969574,
-0.4299980103969574,
0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
0.3219994008541107,
-0.4299980103969574,
0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
0.4299980103969574,
0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.4059983193874359,
0.4299980103969574,
-0.4299980103969574,
0.42199811339378357,
-0.36199888586997986,
-0.4299980103969574,
0.41399821639060974,
-0.4299980103969574,
0.2340002954006195,
-0.4299980103969574,
0.4299980103969574,
0.4299980103969574,
-0.4299980103969574,
0.4299980103969574,
0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
0.4299980103969574,
0.42199811339378357,
-0.4259980618953705,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.41399821639060974,
-0.4299980103969574,
-0.4299980103969574,
0.42199811339378357,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574
],
"bias_update_speed": 0.001,
"num_experts": 60,
"module_type": "AuxFreeQwen2MoeSparseMoeBlock",
"device": "cuda:0",
"dtype": "torch.float32"
},
"model.layers.20.mlp": {
"bias_values": [
-0.4259980618953705,
0.42199811339378357,
0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
0.4299980103969574,
0.3659988343715668,
-0.4299980103969574,
-0.4299980103969574,
0.4299980103969574,
0.4259980618953705,
-0.4299980103969574,
-0.4259980618953705,
0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
0.4299980103969574,
-0.35799893736839294,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
0.35399898886680603,
-0.42199811339378357,
-0.4299980103969574,
-0.4299980103969574,
0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
0.3459990918636322,
-0.4299980103969574,
-0.4199981391429901,
0.38999852538108826,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
0.4259980618953705,
-0.4299980103969574,
0.4299980103969574,
-0.38199862837791443,
-0.4299980103969574,
0.4299980103969574,
0.4299980103969574,
0.4299980103969574,
0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
0.4299980103969574,
0.4299980103969574,
-0.4299980103969574
],
"bias_update_speed": 0.001,
"num_experts": 60,
"module_type": "AuxFreeQwen2MoeSparseMoeBlock",
"device": "cuda:0",
"dtype": "torch.float32"
},
"model.layers.21.mlp": {
"bias_values": [
-0.4299980103969574,
0.4099982678890228,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
0.4299980103969574,
0.2859998643398285,
-0.4299980103969574,
-0.32999929785728455,
-0.4299980103969574,
-0.4299980103969574,
0.35399898886680603,
-0.41399821639060974,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
0.4299980103969574,
0.4299980103969574,
-0.4299980103969574,
0.3979984223842621,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
0.4259980618953705,
-0.4099982678890228,
-0.4299980103969574,
0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.4099982678890228,
-0.4299980103969574,
0.4299980103969574,
-0.4299980103969574,
0.3459990918636322,
-0.4299980103969574,
0.4299980103969574,
0.4299980103969574,
-0.4299980103969574,
0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
0.4299980103969574,
-0.4299980103969574,
-0.3379991948604584,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.4259980618953705,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
0.4299980103969574
],
"bias_update_speed": 0.001,
"num_experts": 60,
"module_type": "AuxFreeQwen2MoeSparseMoeBlock",
"device": "cuda:0",
"dtype": "torch.float32"
},
"model.layers.22.mlp": {
"bias_values": [
-0.4299980103969574,
-0.4299980103969574,
0.4299980103969574,
-0.4099982678890228,
-0.2759999930858612,
-0.4299980103969574,
-0.401998370885849,
0.4299980103969574,
0.4299980103969574,
-0.36199888586997986,
-0.4299980103969574,
-0.4299980103969574,
-0.3079995810985565,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
0.4099982678890228,
-0.4299980103969574,
-0.4299980103969574,
-0.41799816489219666,
0.4099982678890228,
-0.4299980103969574,
0.4299980103969574,
-0.4259980618953705,
-0.4299980103969574,
0.4259980618953705,
-0.4299980103969574,
0.4299980103969574,
-0.4299980103969574,
0.4299980103969574,
0.4299980103969574,
-0.4299980103969574,
0.4299980103969574,
-0.4299980103969574,
0.4299980103969574,
0.4299980103969574,
0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
0.39399847388267517,
-0.4299980103969574,
0.4299980103969574,
0.4299980103969574,
-0.4299980103969574,
0.4299980103969574,
-0.4299980103969574,
0.4299980103969574,
0.4299980103969574,
-0.4299980103969574,
0.4299980103969574,
-0.4299980103969574
],
"bias_update_speed": 0.001,
"num_experts": 60,
"module_type": "AuxFreeQwen2MoeSparseMoeBlock",
"device": "cuda:0",
"dtype": "torch.float32"
},
"model.layers.23.mlp": {
"bias_values": [
0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
0.4299980103969574,
-0.4299980103969574,
0.4299980103969574,
0.33399924635887146,
0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
0.32599934935569763,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.41799816489219666,
0.4299980103969574,
0.4259980618953705,
-0.4299980103969574,
0.04199998453259468,
-0.42199811339378357,
-0.4299980103969574,
0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.401998370885849,
0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
-0.41799816489219666,
-0.4299980103969574,
0.4299980103969574,
0.4299980103969574,
-0.4299980103969574,
0.4299980103969574,
0.4299980103969574,
-0.4299980103969574,
-0.42199811339378357,
-0.4299980103969574,
0.4299980103969574,
-0.4099982678890228,
-0.4299980103969574,
-0.4299980103969574,
0.4299980103969574,
-0.4299980103969574,
0.3779986798763275,
-0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574,
0.4299980103969574,
-0.4299980103969574,
-0.4299980103969574
],
"bias_update_speed": 0.001,
"num_experts": 60,
"module_type": "AuxFreeQwen2MoeSparseMoeBlock",
"device": "cuda:0",
"dtype": "torch.float32"
}
}
}