1667 lines
46 KiB
JSON
1667 lines
46 KiB
JSON
{
|
|
"metadata": {
|
|
"total_moe_layers": 24,
|
|
"save_timestamp": "2026-03-22T21:22:45.786159",
|
|
"model_type": "Qwen2MoeForCausalLM",
|
|
"pytorch_version": "2.6.0+cu124",
|
|
"description": "Auxiliary-loss-free MoE bias states saved during training"
|
|
},
|
|
"moe_bias_states": {
|
|
"model.layers.0.mlp": {
|
|
"bias_values": [
|
|
0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4059983193874359,
|
|
-0.4299980103969574,
|
|
0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
0.4299980103969574,
|
|
-0.4299980103969574,
|
|
0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
0.4299980103969574,
|
|
0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4259980618953705,
|
|
-0.4299980103969574,
|
|
0.3499990403652191,
|
|
-0.4299980103969574,
|
|
0.4299980103969574,
|
|
-0.4299980103969574,
|
|
0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
0.4299980103969574,
|
|
0.4299980103969574,
|
|
0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
0.4299980103969574,
|
|
0.3519990146160126,
|
|
0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574
|
|
],
|
|
"bias_update_speed": 0.001,
|
|
"num_experts": 60,
|
|
"module_type": "AuxFreeQwen2MoeSparseMoeBlock",
|
|
"device": "cuda:0",
|
|
"dtype": "torch.float32"
|
|
},
|
|
"model.layers.1.mlp": {
|
|
"bias_values": [
|
|
0.4059983193874359,
|
|
-0.4299980103969574,
|
|
-0.39399847388267517,
|
|
0.4299980103969574,
|
|
-0.4099982678890228,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.07199996709823608,
|
|
-0.4059983193874359,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
0.4299980103969574,
|
|
0.42199811339378357,
|
|
-0.4299980103969574,
|
|
0.4299980103969574,
|
|
-0.4299980103969574,
|
|
0.4299980103969574,
|
|
-0.4299980103969574,
|
|
0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
0.4299980103969574,
|
|
0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
0.3219994008541107,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
0.4059983193874359,
|
|
0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.42199811339378357,
|
|
-0.4299980103969574,
|
|
-0.41799816489219666,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
0.4299980103969574,
|
|
-0.4299980103969574,
|
|
0.4299980103969574,
|
|
0.4299980103969574,
|
|
0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4059983193874359,
|
|
0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574
|
|
],
|
|
"bias_update_speed": 0.001,
|
|
"num_experts": 60,
|
|
"module_type": "AuxFreeQwen2MoeSparseMoeBlock",
|
|
"device": "cuda:0",
|
|
"dtype": "torch.float32"
|
|
},
|
|
"model.layers.2.mlp": {
|
|
"bias_values": [
|
|
0.4299980103969574,
|
|
0.4099982678890228,
|
|
-0.4299980103969574,
|
|
-0.401998370885849,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
0.4299980103969574,
|
|
-0.4259980618953705,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
0.39399847388267517,
|
|
-0.4259980618953705,
|
|
-0.4299980103969574,
|
|
0.4299980103969574,
|
|
-0.4299980103969574,
|
|
0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4259980618953705,
|
|
0.4299980103969574,
|
|
0.4299980103969574,
|
|
-0.4299980103969574,
|
|
0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
0.4299980103969574,
|
|
-0.36199888586997986,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
0.4299980103969574
|
|
],
|
|
"bias_update_speed": 0.001,
|
|
"num_experts": 60,
|
|
"module_type": "AuxFreeQwen2MoeSparseMoeBlock",
|
|
"device": "cuda:0",
|
|
"dtype": "torch.float32"
|
|
},
|
|
"model.layers.3.mlp": {
|
|
"bias_values": [
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
0.4299980103969574,
|
|
-0.401998370885849,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
0.4299980103969574,
|
|
0.42199811339378357,
|
|
-0.4299980103969574,
|
|
0.4299980103969574,
|
|
-0.4299980103969574,
|
|
0.4299980103969574,
|
|
-0.38999852538108826,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
0.4299980103969574,
|
|
0.4299980103969574,
|
|
0.42199811339378357,
|
|
0.41399821639060974,
|
|
0.4299980103969574,
|
|
-0.4299980103969574,
|
|
0.4299980103969574,
|
|
-0.4299980103969574,
|
|
0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
0.4299980103969574,
|
|
0.4299980103969574,
|
|
-0.4299980103969574,
|
|
0.4299980103969574,
|
|
-0.4299980103969574,
|
|
0.4299980103969574,
|
|
0.4299980103969574,
|
|
0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
0.4299980103969574,
|
|
-0.4299980103969574,
|
|
0.4299980103969574,
|
|
-0.262000173330307,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.3459990918636322,
|
|
0.4299980103969574,
|
|
0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.42199811339378357
|
|
],
|
|
"bias_update_speed": 0.001,
|
|
"num_experts": 60,
|
|
"module_type": "AuxFreeQwen2MoeSparseMoeBlock",
|
|
"device": "cuda:0",
|
|
"dtype": "torch.float32"
|
|
},
|
|
"model.layers.4.mlp": {
|
|
"bias_values": [
|
|
0.4299980103969574,
|
|
-0.4299980103969574,
|
|
0.4299980103969574,
|
|
0.3779986798763275,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
0.4299980103969574,
|
|
0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.41799816489219666,
|
|
-0.4299980103969574,
|
|
-0.4059983193874359,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
0.4299980103969574,
|
|
0.4299980103969574,
|
|
0.4299980103969574,
|
|
-0.4299980103969574,
|
|
0.38999852538108826,
|
|
0.4299980103969574,
|
|
0.4299980103969574,
|
|
0.4299980103969574,
|
|
0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
0.4299980103969574,
|
|
0.4299980103969574,
|
|
0.4299980103969574,
|
|
-0.4299980103969574,
|
|
0.4299980103969574,
|
|
0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
0.4299980103969574,
|
|
-0.4299980103969574,
|
|
0.4299980103969574,
|
|
-0.4299980103969574,
|
|
0.401998370885849
|
|
],
|
|
"bias_update_speed": 0.001,
|
|
"num_experts": 60,
|
|
"module_type": "AuxFreeQwen2MoeSparseMoeBlock",
|
|
"device": "cuda:0",
|
|
"dtype": "torch.float32"
|
|
},
|
|
"model.layers.5.mlp": {
|
|
"bias_values": [
|
|
0.4299980103969574,
|
|
-0.4059983193874359,
|
|
-0.4299980103969574,
|
|
-0.42199811339378357,
|
|
-0.41399821639060974,
|
|
-0.4299980103969574,
|
|
0.41799816489219666,
|
|
0.42199811339378357,
|
|
-0.41399821639060974,
|
|
0.4259980618953705,
|
|
-0.4299980103969574,
|
|
0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
0.4299980103969574,
|
|
0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
0.4299980103969574,
|
|
-0.4299980103969574,
|
|
0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
0.4059983193874359,
|
|
0.4299980103969574,
|
|
-0.4299980103969574,
|
|
0.4299980103969574,
|
|
0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
0.4299980103969574,
|
|
0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
0.4259980618953705,
|
|
0.4299980103969574,
|
|
0.4299980103969574,
|
|
0.4299980103969574,
|
|
-0.4299980103969574,
|
|
0.4299980103969574,
|
|
0.42199811339378357
|
|
],
|
|
"bias_update_speed": 0.001,
|
|
"num_experts": 60,
|
|
"module_type": "AuxFreeQwen2MoeSparseMoeBlock",
|
|
"device": "cuda:0",
|
|
"dtype": "torch.float32"
|
|
},
|
|
"model.layers.6.mlp": {
|
|
"bias_values": [
|
|
0.4299980103969574,
|
|
0.4299980103969574,
|
|
-0.38599857687950134,
|
|
0.4299980103969574,
|
|
0.4299980103969574,
|
|
0.4299980103969574,
|
|
0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.41799816489219666,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
0.4299980103969574,
|
|
-0.4299980103969574,
|
|
0.24000030755996704,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
0.4299980103969574,
|
|
0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.42199811339378357,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
0.4299980103969574,
|
|
0.4259980618953705,
|
|
0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.42199811339378357,
|
|
0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.41799816489219666,
|
|
-0.4059983193874359,
|
|
0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
0.4299980103969574,
|
|
0.41399821639060974,
|
|
-0.4299980103969574
|
|
],
|
|
"bias_update_speed": 0.001,
|
|
"num_experts": 60,
|
|
"module_type": "AuxFreeQwen2MoeSparseMoeBlock",
|
|
"device": "cuda:0",
|
|
"dtype": "torch.float32"
|
|
},
|
|
"model.layers.7.mlp": {
|
|
"bias_values": [
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
0.4299980103969574,
|
|
0.4299980103969574,
|
|
-0.4299980103969574,
|
|
0.4299980103969574,
|
|
0.4259980618953705,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
0.4299980103969574,
|
|
-0.401998370885849,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4259980618953705,
|
|
-0.4299980103969574,
|
|
0.4299980103969574,
|
|
0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.3979984223842621,
|
|
0.4299980103969574,
|
|
0.4299980103969574,
|
|
0.22600027918815613,
|
|
0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
0.3979984223842621,
|
|
0.401998370885849,
|
|
0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
0.4299980103969574
|
|
],
|
|
"bias_update_speed": 0.001,
|
|
"num_experts": 60,
|
|
"module_type": "AuxFreeQwen2MoeSparseMoeBlock",
|
|
"device": "cuda:0",
|
|
"dtype": "torch.float32"
|
|
},
|
|
"model.layers.8.mlp": {
|
|
"bias_values": [
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
0.4299980103969574,
|
|
0.4299980103969574,
|
|
-0.4259980618953705,
|
|
0.4299980103969574,
|
|
-0.4299980103969574,
|
|
0.4299980103969574,
|
|
-0.4299980103969574,
|
|
0.4299980103969574,
|
|
-0.4299980103969574,
|
|
0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.41799816489219666,
|
|
0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.41399821639060974,
|
|
0.4299980103969574,
|
|
-0.4299980103969574,
|
|
0.41799816489219666,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
0.42199811339378357,
|
|
-0.42199811339378357,
|
|
0.3739987313747406,
|
|
-0.4299980103969574,
|
|
0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
0.4299980103969574,
|
|
0.38599857687950134,
|
|
0.4299980103969574,
|
|
0.4259980618953705,
|
|
-0.4299980103969574,
|
|
0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
0.4299980103969574,
|
|
0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
0.4299980103969574,
|
|
-0.4299980103969574,
|
|
0.4059983193874359,
|
|
0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.41399821639060974,
|
|
-0.4299980103969574
|
|
],
|
|
"bias_update_speed": 0.001,
|
|
"num_experts": 60,
|
|
"module_type": "AuxFreeQwen2MoeSparseMoeBlock",
|
|
"device": "cuda:0",
|
|
"dtype": "torch.float32"
|
|
},
|
|
"model.layers.9.mlp": {
|
|
"bias_values": [
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
0.3779986798763275,
|
|
-0.4259980618953705,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
0.2899998128414154,
|
|
0.4299980103969574,
|
|
-0.4299980103969574,
|
|
0.4299980103969574,
|
|
0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4099982678890228,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
0.4299980103969574,
|
|
0.4299980103969574,
|
|
0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
0.4299980103969574,
|
|
0.401998370885849,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
0.4299980103969574,
|
|
-0.4299980103969574,
|
|
0.4299980103969574,
|
|
-0.4299980103969574,
|
|
0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
0.4299980103969574,
|
|
0.3419991433620453,
|
|
0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574
|
|
],
|
|
"bias_update_speed": 0.001,
|
|
"num_experts": 60,
|
|
"module_type": "AuxFreeQwen2MoeSparseMoeBlock",
|
|
"device": "cuda:0",
|
|
"dtype": "torch.float32"
|
|
},
|
|
"model.layers.10.mlp": {
|
|
"bias_values": [
|
|
-0.4299980103969574,
|
|
0.18200019001960754,
|
|
-0.4299980103969574,
|
|
-0.401998370885849,
|
|
-0.3659988343715668,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.39399847388267517,
|
|
-0.4299980103969574,
|
|
-0.4259980618953705,
|
|
0.38999852538108826,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
0.4299980103969574,
|
|
-0.4299980103969574,
|
|
0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
0.4299980103969574,
|
|
-0.4299980103969574,
|
|
0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
0.4299980103969574,
|
|
0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
0.36199888586997986,
|
|
0.30599960684776306,
|
|
-0.4099982678890228,
|
|
-0.4299980103969574,
|
|
0.4299980103969574,
|
|
-0.4299980103969574,
|
|
0.4299980103969574,
|
|
-0.4259980618953705,
|
|
0.4259980618953705,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
0.4299980103969574,
|
|
0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
0.4299980103969574,
|
|
-0.4299980103969574,
|
|
0.3739987313747406,
|
|
0.4299980103969574,
|
|
0.38199862837791443
|
|
],
|
|
"bias_update_speed": 0.001,
|
|
"num_experts": 60,
|
|
"module_type": "AuxFreeQwen2MoeSparseMoeBlock",
|
|
"device": "cuda:0",
|
|
"dtype": "torch.float32"
|
|
},
|
|
"model.layers.11.mlp": {
|
|
"bias_values": [
|
|
-0.4299980103969574,
|
|
0.4299980103969574,
|
|
-0.4299980103969574,
|
|
0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
0.42199811339378357,
|
|
-0.4299980103969574,
|
|
0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
0.4299980103969574,
|
|
-0.4299980103969574,
|
|
0.3779986798763275,
|
|
0.4299980103969574,
|
|
-0.4259980618953705,
|
|
-0.4299980103969574,
|
|
-0.42199811339378357,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4259980618953705,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.41399821639060974,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
0.3699987828731537,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
0.4299980103969574,
|
|
0.30599960684776306,
|
|
0.4299980103969574,
|
|
0.4299980103969574,
|
|
0.4299980103969574,
|
|
0.4299980103969574,
|
|
0.4299980103969574,
|
|
-0.4299980103969574,
|
|
0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.41799816489219666,
|
|
0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574
|
|
],
|
|
"bias_update_speed": 0.001,
|
|
"num_experts": 60,
|
|
"module_type": "AuxFreeQwen2MoeSparseMoeBlock",
|
|
"device": "cuda:0",
|
|
"dtype": "torch.float32"
|
|
},
|
|
"model.layers.12.mlp": {
|
|
"bias_values": [
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4259980618953705,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
0.36199888586997986,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
0.4299980103969574,
|
|
-0.38999852538108826,
|
|
-0.42199811339378357,
|
|
0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
0.3779986798763275,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4059983193874359,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4259980618953705,
|
|
0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
0.4299980103969574,
|
|
0.3419991433620453,
|
|
0.4299980103969574,
|
|
0.4099982678890228,
|
|
0.4299980103969574,
|
|
-0.3699987828731537,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
0.4299980103969574,
|
|
-0.4299980103969574,
|
|
0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
0.42199811339378357,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
0.4299980103969574
|
|
],
|
|
"bias_update_speed": 0.001,
|
|
"num_experts": 60,
|
|
"module_type": "AuxFreeQwen2MoeSparseMoeBlock",
|
|
"device": "cuda:0",
|
|
"dtype": "torch.float32"
|
|
},
|
|
"model.layers.13.mlp": {
|
|
"bias_values": [
|
|
-0.4299980103969574,
|
|
0.2540002763271332,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
0.42199811339378357,
|
|
-0.4299980103969574,
|
|
0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
0.4299980103969574,
|
|
-0.4299980103969574,
|
|
0.3379991948604584,
|
|
0.4299980103969574,
|
|
-0.4299980103969574,
|
|
0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4059983193874359,
|
|
0.4099982678890228,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
0.36199888586997986,
|
|
-0.4299980103969574,
|
|
0.4299980103969574,
|
|
-0.4299980103969574,
|
|
0.4299980103969574,
|
|
0.4299980103969574,
|
|
0.4299980103969574,
|
|
0.4299980103969574,
|
|
0.4299980103969574,
|
|
0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.38599857687950134,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
0.4299980103969574,
|
|
0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574
|
|
],
|
|
"bias_update_speed": 0.001,
|
|
"num_experts": 60,
|
|
"module_type": "AuxFreeQwen2MoeSparseMoeBlock",
|
|
"device": "cuda:0",
|
|
"dtype": "torch.float32"
|
|
},
|
|
"model.layers.14.mlp": {
|
|
"bias_values": [
|
|
0.4299980103969574,
|
|
0.4299980103969574,
|
|
0.4299980103969574,
|
|
0.3699987828731537,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.32999929785728455,
|
|
0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
0.4299980103969574,
|
|
0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4259980618953705,
|
|
-0.1800001859664917,
|
|
0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
0.4299980103969574,
|
|
0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
0.4259980618953705,
|
|
-0.4259980618953705,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
0.4299980103969574,
|
|
-0.4299980103969574,
|
|
0.4299980103969574,
|
|
0.4299980103969574,
|
|
-0.29799970984458923,
|
|
0.4299980103969574,
|
|
-0.4259980618953705,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
0.4299980103969574,
|
|
-0.4299980103969574,
|
|
0.4299980103969574,
|
|
0.39399847388267517,
|
|
0.41799816489219666,
|
|
-0.4299980103969574,
|
|
0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574
|
|
],
|
|
"bias_update_speed": 0.001,
|
|
"num_experts": 60,
|
|
"module_type": "AuxFreeQwen2MoeSparseMoeBlock",
|
|
"device": "cuda:0",
|
|
"dtype": "torch.float32"
|
|
},
|
|
"model.layers.15.mlp": {
|
|
"bias_values": [
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4099982678890228,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
0.4299980103969574,
|
|
-0.4299980103969574,
|
|
0.4299980103969574,
|
|
0.4299980103969574,
|
|
-0.4299980103969574,
|
|
0.4299980103969574,
|
|
-0.4299980103969574,
|
|
0.4299980103969574,
|
|
-0.4299980103969574,
|
|
0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
0.4299980103969574,
|
|
-0.4299980103969574,
|
|
0.4299980103969574,
|
|
0.4299980103969574,
|
|
-0.4299980103969574,
|
|
0.4299980103969574,
|
|
-0.4299980103969574,
|
|
0.2899998128414154,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
0.4299980103969574,
|
|
0.4299980103969574,
|
|
0.4299980103969574,
|
|
0.4299980103969574,
|
|
0.4099982678890228,
|
|
-0.4099982678890228,
|
|
-0.4259980618953705,
|
|
0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.38599857687950134,
|
|
0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
0.4259980618953705,
|
|
-0.4099982678890228,
|
|
0.4299980103969574,
|
|
-0.4059983193874359,
|
|
0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.42199811339378357,
|
|
-0.4299980103969574,
|
|
-0.38999852538108826
|
|
],
|
|
"bias_update_speed": 0.001,
|
|
"num_experts": 60,
|
|
"module_type": "AuxFreeQwen2MoeSparseMoeBlock",
|
|
"device": "cuda:0",
|
|
"dtype": "torch.float32"
|
|
},
|
|
"model.layers.16.mlp": {
|
|
"bias_values": [
|
|
-0.4299980103969574,
|
|
0.4099982678890228,
|
|
0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
0.4299980103969574,
|
|
-0.18600019812583923,
|
|
-0.4299980103969574,
|
|
0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4259980618953705,
|
|
0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
0.36199888586997986,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
0.32599934935569763,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
0.4259980618953705,
|
|
0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4259980618953705,
|
|
0.4299980103969574,
|
|
0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
0.4299980103969574,
|
|
-0.4299980103969574,
|
|
0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.38599857687950134,
|
|
-0.4259980618953705,
|
|
-0.4299980103969574,
|
|
0.4299980103969574,
|
|
0.3979984223842621,
|
|
0.4299980103969574,
|
|
-0.4299980103969574,
|
|
0.42199811339378357,
|
|
0.4299980103969574
|
|
],
|
|
"bias_update_speed": 0.001,
|
|
"num_experts": 60,
|
|
"module_type": "AuxFreeQwen2MoeSparseMoeBlock",
|
|
"device": "cuda:0",
|
|
"dtype": "torch.float32"
|
|
},
|
|
"model.layers.17.mlp": {
|
|
"bias_values": [
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.39399847388267517,
|
|
0.4299980103969574,
|
|
-0.41399821639060974,
|
|
0.4299980103969574,
|
|
-0.41799816489219666,
|
|
0.4299980103969574,
|
|
-0.4299980103969574,
|
|
0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.401998370885849,
|
|
0.4299980103969574,
|
|
-0.4299980103969574,
|
|
0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4259980618953705,
|
|
-0.4299980103969574,
|
|
-0.3179994523525238,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
0.4299980103969574,
|
|
0.21400025486946106,
|
|
0.38599857687950134,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
0.3779986798763275,
|
|
-0.4299980103969574,
|
|
-0.3659988343715668,
|
|
-0.4299980103969574,
|
|
0.4299980103969574,
|
|
0.33399924635887146,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4099982678890228,
|
|
-0.4299980103969574,
|
|
0.4299980103969574,
|
|
-0.4299980103969574,
|
|
0.4299980103969574,
|
|
-0.4299980103969574,
|
|
0.4299980103969574,
|
|
0.4299980103969574,
|
|
-0.4299980103969574,
|
|
0.3659988343715668,
|
|
0.4299980103969574,
|
|
0.4259980618953705
|
|
],
|
|
"bias_update_speed": 0.001,
|
|
"num_experts": 60,
|
|
"module_type": "AuxFreeQwen2MoeSparseMoeBlock",
|
|
"device": "cuda:0",
|
|
"dtype": "torch.float32"
|
|
},
|
|
"model.layers.18.mlp": {
|
|
"bias_values": [
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.39399847388267517,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
0.4299980103969574,
|
|
-0.4299980103969574,
|
|
0.4119982421398163,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
0.4299980103969574,
|
|
0.4299980103969574,
|
|
-0.4299980103969574,
|
|
0.4299980103969574,
|
|
0.42199811339378357,
|
|
0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4259980618953705,
|
|
-0.4299980103969574,
|
|
-0.34399911761283875,
|
|
0.4299980103969574,
|
|
-0.41799816489219666,
|
|
0.4299980103969574,
|
|
0.4299980103969574,
|
|
0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
0.4299980103969574,
|
|
-0.42199811339378357,
|
|
-0.3779986798763275,
|
|
-0.4299980103969574,
|
|
0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4259980618953705,
|
|
-0.4299980103969574,
|
|
0.35799893736839294,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
0.4299980103969574,
|
|
0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.35399898886680603,
|
|
0.4299980103969574,
|
|
-0.4259980618953705,
|
|
-0.4299980103969574,
|
|
-0.41799816489219666,
|
|
-0.4299980103969574
|
|
],
|
|
"bias_update_speed": 0.001,
|
|
"num_experts": 60,
|
|
"module_type": "AuxFreeQwen2MoeSparseMoeBlock",
|
|
"device": "cuda:0",
|
|
"dtype": "torch.float32"
|
|
},
|
|
"model.layers.19.mlp": {
|
|
"bias_values": [
|
|
0.4299980103969574,
|
|
-0.4299980103969574,
|
|
0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
0.3219994008541107,
|
|
-0.4299980103969574,
|
|
0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
0.4299980103969574,
|
|
0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4059983193874359,
|
|
0.4299980103969574,
|
|
-0.4299980103969574,
|
|
0.42199811339378357,
|
|
-0.36199888586997986,
|
|
-0.4299980103969574,
|
|
0.41399821639060974,
|
|
-0.4299980103969574,
|
|
0.2340002954006195,
|
|
-0.4299980103969574,
|
|
0.4299980103969574,
|
|
0.4299980103969574,
|
|
-0.4299980103969574,
|
|
0.4299980103969574,
|
|
0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
0.4299980103969574,
|
|
0.42199811339378357,
|
|
-0.4259980618953705,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.41399821639060974,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
0.42199811339378357,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574
|
|
],
|
|
"bias_update_speed": 0.001,
|
|
"num_experts": 60,
|
|
"module_type": "AuxFreeQwen2MoeSparseMoeBlock",
|
|
"device": "cuda:0",
|
|
"dtype": "torch.float32"
|
|
},
|
|
"model.layers.20.mlp": {
|
|
"bias_values": [
|
|
-0.4259980618953705,
|
|
0.42199811339378357,
|
|
0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
0.4299980103969574,
|
|
0.3659988343715668,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
0.4299980103969574,
|
|
0.4259980618953705,
|
|
-0.4299980103969574,
|
|
-0.4259980618953705,
|
|
0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
0.4299980103969574,
|
|
-0.35799893736839294,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
0.35399898886680603,
|
|
-0.42199811339378357,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
0.3459990918636322,
|
|
-0.4299980103969574,
|
|
-0.4199981391429901,
|
|
0.38999852538108826,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
0.4259980618953705,
|
|
-0.4299980103969574,
|
|
0.4299980103969574,
|
|
-0.38199862837791443,
|
|
-0.4299980103969574,
|
|
0.4299980103969574,
|
|
0.4299980103969574,
|
|
0.4299980103969574,
|
|
0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
0.4299980103969574,
|
|
0.4299980103969574,
|
|
-0.4299980103969574
|
|
],
|
|
"bias_update_speed": 0.001,
|
|
"num_experts": 60,
|
|
"module_type": "AuxFreeQwen2MoeSparseMoeBlock",
|
|
"device": "cuda:0",
|
|
"dtype": "torch.float32"
|
|
},
|
|
"model.layers.21.mlp": {
|
|
"bias_values": [
|
|
-0.4299980103969574,
|
|
0.4099982678890228,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
0.4299980103969574,
|
|
0.2859998643398285,
|
|
-0.4299980103969574,
|
|
-0.32999929785728455,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
0.35399898886680603,
|
|
-0.41399821639060974,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
0.4299980103969574,
|
|
0.4299980103969574,
|
|
-0.4299980103969574,
|
|
0.3979984223842621,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
0.4259980618953705,
|
|
-0.4099982678890228,
|
|
-0.4299980103969574,
|
|
0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4099982678890228,
|
|
-0.4299980103969574,
|
|
0.4299980103969574,
|
|
-0.4299980103969574,
|
|
0.3459990918636322,
|
|
-0.4299980103969574,
|
|
0.4299980103969574,
|
|
0.4299980103969574,
|
|
-0.4299980103969574,
|
|
0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.3379991948604584,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4259980618953705,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
0.4299980103969574
|
|
],
|
|
"bias_update_speed": 0.001,
|
|
"num_experts": 60,
|
|
"module_type": "AuxFreeQwen2MoeSparseMoeBlock",
|
|
"device": "cuda:0",
|
|
"dtype": "torch.float32"
|
|
},
|
|
"model.layers.22.mlp": {
|
|
"bias_values": [
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
0.4299980103969574,
|
|
-0.4099982678890228,
|
|
-0.2759999930858612,
|
|
-0.4299980103969574,
|
|
-0.401998370885849,
|
|
0.4299980103969574,
|
|
0.4299980103969574,
|
|
-0.36199888586997986,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.3079995810985565,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
0.4099982678890228,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.41799816489219666,
|
|
0.4099982678890228,
|
|
-0.4299980103969574,
|
|
0.4299980103969574,
|
|
-0.4259980618953705,
|
|
-0.4299980103969574,
|
|
0.4259980618953705,
|
|
-0.4299980103969574,
|
|
0.4299980103969574,
|
|
-0.4299980103969574,
|
|
0.4299980103969574,
|
|
0.4299980103969574,
|
|
-0.4299980103969574,
|
|
0.4299980103969574,
|
|
-0.4299980103969574,
|
|
0.4299980103969574,
|
|
0.4299980103969574,
|
|
0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
0.39399847388267517,
|
|
-0.4299980103969574,
|
|
0.4299980103969574,
|
|
0.4299980103969574,
|
|
-0.4299980103969574,
|
|
0.4299980103969574,
|
|
-0.4299980103969574,
|
|
0.4299980103969574,
|
|
0.4299980103969574,
|
|
-0.4299980103969574,
|
|
0.4299980103969574,
|
|
-0.4299980103969574
|
|
],
|
|
"bias_update_speed": 0.001,
|
|
"num_experts": 60,
|
|
"module_type": "AuxFreeQwen2MoeSparseMoeBlock",
|
|
"device": "cuda:0",
|
|
"dtype": "torch.float32"
|
|
},
|
|
"model.layers.23.mlp": {
|
|
"bias_values": [
|
|
0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
0.4299980103969574,
|
|
-0.4299980103969574,
|
|
0.4299980103969574,
|
|
0.33399924635887146,
|
|
0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
0.32599934935569763,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.41799816489219666,
|
|
0.4299980103969574,
|
|
0.4259980618953705,
|
|
-0.4299980103969574,
|
|
0.04199998453259468,
|
|
-0.42199811339378357,
|
|
-0.4299980103969574,
|
|
0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.401998370885849,
|
|
0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.41799816489219666,
|
|
-0.4299980103969574,
|
|
0.4299980103969574,
|
|
0.4299980103969574,
|
|
-0.4299980103969574,
|
|
0.4299980103969574,
|
|
0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.42199811339378357,
|
|
-0.4299980103969574,
|
|
0.4299980103969574,
|
|
-0.4099982678890228,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
0.4299980103969574,
|
|
-0.4299980103969574,
|
|
0.3779986798763275,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574,
|
|
0.4299980103969574,
|
|
-0.4299980103969574,
|
|
-0.4299980103969574
|
|
],
|
|
"bias_update_speed": 0.001,
|
|
"num_experts": 60,
|
|
"module_type": "AuxFreeQwen2MoeSparseMoeBlock",
|
|
"device": "cuda:0",
|
|
"dtype": "torch.float32"
|
|
}
|
|
}
|
|
} |