Files
Qwen1.5-MOE-aux-free-sft-ma…/moe_bias_states.json
ModelHub XC 70e484f736 初始化项目,由ModelHub XC社区提供模型
Model: xd2010/Qwen1.5-MOE-aux-free-sft-math7k-1e-3-gamma-1epoch
Source: Original Platform
2026-04-12 16:30:09 +08:00

1667 lines
49 KiB
JSON

{
"metadata": {
"total_moe_layers": 24,
"save_timestamp": "2026-03-22T20:05:51.747979",
"model_type": "Qwen2MoeForCausalLM",
"pytorch_version": "2.6.0+cu124",
"description": "Auxiliary-loss-free MoE bias states saved during training"
},
"moe_bias_states": {
"model.layers.0.mlp": {
"bias_values": [
0.004300014581531286,
-0.0042600142769515514,
-0.004300014581531286,
-0.004300014581531286,
-0.004300014581531286,
-0.004300014581531286,
-0.004060012754052877,
-0.004300014581531286,
0.004300014581531286,
-0.004300014581531286,
-0.004300014581531286,
0.004300014581531286,
-0.004300014581531286,
0.004300014581531286,
-0.004300014581531286,
-0.004300014581531286,
0.004300014581531286,
-0.004300014581531286,
-0.004300014581531286,
-0.004300014581531286,
-0.004300014581531286,
-0.003980012144893408,
-0.004020012449473143,
0.004300014581531286,
0.004300014581531286,
-0.004300014581531286,
-0.004300014581531286,
0.004300014581531286,
-0.004300014581531286,
-0.0042600142769515514,
-0.004220013972371817,
-0.0013399991439655423,
-0.004300014581531286,
0.004300014581531286,
-0.004300014581531286,
0.004300014581531286,
-0.004300014581531286,
-0.004300014581531286,
-0.003740010317414999,
-0.0036600097082555294,
-0.004300014581531286,
-0.004300014581531286,
-0.004300014581531286,
0.004300014581531286,
0.004300014581531286,
0.004060012754052877,
-0.004300014581531286,
-0.003980012144893408,
0.004300014581531286,
-0.004300014581531286,
-0.004300014581531286,
-0.004300014581531286,
0.004300014581531286,
-0.0019799969159066677,
0.004300014581531286,
-0.004300014581531286,
-0.0042600142769515514,
0.004300014581531286,
-0.004300014581531286,
-0.004300014581531286
],
"bias_update_speed": 1e-05,
"num_experts": 60,
"module_type": "AuxFreeQwen2MoeSparseMoeBlock",
"device": "cuda:0",
"dtype": "torch.float32"
},
"model.layers.1.mlp": {
"bias_values": [
0.0005799998762086034,
-0.004220013972371817,
0.0003000000142492354,
0.003980012144893408,
-0.0034600081853568554,
-0.004300014581531286,
-0.0020599975250661373,
-0.0042600142769515514,
-0.00250000087544322,
-0.002940004225820303,
-0.004300014581531286,
-0.004300014581531286,
-0.004140013363212347,
0.004300014581531286,
-0.003300006967037916,
-0.004020012449473143,
0.004300014581531286,
0.004020012449473143,
-0.0012199996272101998,
0.004300014581531286,
-0.004300014581531286,
0.004300014581531286,
-0.004300014581531286,
0.004300014581531286,
-0.004300014581531286,
-0.004300014581531286,
0.0042600142769515514,
0.004300014581531286,
-0.004300014581531286,
-0.004300014581531286,
-0.004300014581531286,
-0.004300014581531286,
0.002740002702921629,
-0.004300014581531286,
-0.004300014581531286,
-0.004300014581531286,
-0.0014999986160546541,
0.004300014581531286,
-0.00078000029316172,
-0.004220013972371817,
-0.0039000115357339382,
-0.004300014581531286,
-0.003780010621994734,
-0.002700002398341894,
-0.004300014581531286,
-0.0042600142769515514,
0.0003400000277906656,
0.004300014581531286,
-0.004300014581531286,
0.004180013667792082,
0.004300014581531286,
0.004300014581531286,
-0.004300014581531286,
-0.004300014581531286,
0.0042600142769515514,
-0.004300014581531286,
-0.0030600051395595074,
0.004300014581531286,
-0.004220013972371817,
-0.004300014581531286
],
"bias_update_speed": 1e-05,
"num_experts": 60,
"module_type": "AuxFreeQwen2MoeSparseMoeBlock",
"device": "cuda:0",
"dtype": "torch.float32"
},
"model.layers.2.mlp": {
"bias_values": [
0.0042600142769515514,
0.00358000909909606,
-0.004300014581531286,
-0.0022599990479648113,
-0.00033999994047917426,
-0.004300014581531286,
-0.004300014581531286,
0.004300014581531286,
-0.004300014581531286,
-0.004300014581531286,
-0.004300014581531286,
-0.004300014581531286,
-0.004300014581531286,
0.004300014581531286,
-0.004300014581531286,
-0.004300014581531286,
-0.004300014581531286,
-0.004300014581531286,
-0.0013399991439655423,
0.004300014581531286,
-0.0032200063578784466,
-0.0029800045304000378,
-0.004300014581531286,
-0.002299999352544546,
-0.003140005748718977,
-0.0042600142769515514,
-0.004140013363212347,
0.004300014581531286,
-0.004020012449473143,
0.004300014581531286,
-0.004300014581531286,
-0.0042600142769515514,
-0.004300014581531286,
0.004140013363212347,
-0.004300014581531286,
-0.004300014581531286,
-0.004300014581531286,
-0.004300014581531286,
-0.0008600004366599023,
0.004300014581531286,
0.004300014581531286,
-0.004140013363212347,
0.004300014581531286,
-0.004300014581531286,
-0.004300014581531286,
-0.004300014581531286,
-0.004300014581531286,
0.004300014581531286,
-0.004300014581531286,
-0.004300014581531286,
-0.004300014581531286,
-0.004300014581531286,
-0.004300014581531286,
-0.004300014581531286,
0.0038600112311542034,
-0.003940011840313673,
-0.004300014581531286,
-0.0028200033120810986,
-0.004300014581531286,
0.004300014581531286
],
"bias_update_speed": 1e-05,
"num_experts": 60,
"module_type": "AuxFreeQwen2MoeSparseMoeBlock",
"device": "cuda:0",
"dtype": "torch.float32"
},
"model.layers.3.mlp": {
"bias_values": [
-0.004300014581531286,
-0.004300014581531286,
-0.004300014581531286,
0.004060012754052877,
-0.0036600097082555294,
-0.004300014581531286,
-0.003380007576197386,
-0.004300014581531286,
-0.0042600142769515514,
0.004300014581531286,
0.004060012754052877,
-0.004300014581531286,
0.004300014581531286,
-0.004300014581531286,
0.0042600142769515514,
0.0009400006383657455,
-0.004300014581531286,
-0.004300014581531286,
0.004300014581531286,
0.004300014581531286,
0.003740010317414999,
0.003740010317414999,
0.004140013363212347,
-0.0042600142769515514,
0.004300014581531286,
-0.004300014581531286,
0.003980012144893408,
-0.004300014581531286,
-0.004300014581531286,
0.004300014581531286,
0.004300014581531286,
-0.004020012449473143,
0.004300014581531286,
-0.004300014581531286,
0.004180013667792082,
0.004300014581531286,
0.00358000909909606,
-0.004300014581531286,
-0.004300014581531286,
0.004300014581531286,
-0.004300014581531286,
0.004300014581531286,
-0.0014999984996393323,
-0.004300014581531286,
-0.004300014581531286,
0.004300014581531286,
-0.004300014581531286,
-0.003700010012835264,
0.004300014581531286,
0.004300014581531286,
-0.004300014581531286,
-0.0013799989828839898,
-0.003940011840313673,
0.004300014581531286,
-0.004300014581531286,
-0.004300014581531286,
-0.0024200002662837505,
-0.004300014581531286,
-0.003740010317414999,
-0.0034600081853568554
],
"bias_update_speed": 1e-05,
"num_experts": 60,
"module_type": "AuxFreeQwen2MoeSparseMoeBlock",
"device": "cuda:0",
"dtype": "torch.float32"
},
"model.layers.4.mlp": {
"bias_values": [
0.004300014581531286,
-0.004180013667792082,
0.004300014581531286,
0.0011399999493733048,
-0.004300014581531286,
-0.004300014581531286,
-0.004300014581531286,
0.0038200109265744686,
0.004300014581531286,
-0.004300014581531286,
-0.004300014581531286,
-0.004300014581531286,
-0.004300014581531286,
-0.004300014581531286,
-0.004300014581531286,
-0.004300014581531286,
-0.004300014581531286,
0.004300014581531286,
-0.004300014581531286,
-0.004300014581531286,
-0.0032200063578784466,
-0.004300014581531286,
-0.0030600051395595074,
-0.004300014581531286,
-0.0026600020937621593,
0.004300014581531286,
0.004300014581531286,
0.004300014581531286,
-0.004300014581531286,
0.00350000848993659,
-0.0008600004948675632,
0.004300014581531286,
0.004300014581531286,
0.004300014581531286,
-0.002540001180022955,
-0.004300014581531286,
0.004300014581531286,
-0.004300014581531286,
-0.004300014581531286,
-0.004300014581531286,
-0.004300014581531286,
0.004300014581531286,
-0.003940011840313673,
-0.004300014581531286,
-0.004300014581531286,
-0.0038200109265744686,
0.004300014581531286,
0.004300014581531286,
0.004300014581531286,
-0.004300014581531286,
0.004300014581531286,
0.004300014581531286,
-0.004300014581531286,
-0.004300014581531286,
-0.00358000909909606,
0.003740010317414999,
-0.004300014581531286,
0.004300014581531286,
-0.004300014581531286,
0.002940004225820303
],
"bias_update_speed": 1e-05,
"num_experts": 60,
"module_type": "AuxFreeQwen2MoeSparseMoeBlock",
"device": "cuda:0",
"dtype": "torch.float32"
},
"model.layers.5.mlp": {
"bias_values": [
0.004300014581531286,
-0.003380007576197386,
-0.004300014581531286,
-0.0036200094036757946,
-0.004100013058632612,
-0.004300014581531286,
0.0038600112311542034,
0.0038600112311542034,
0.0014999986160546541,
0.0034600081853568554,
-0.004300014581531286,
0.0034200078807771206,
-0.004300014581531286,
-0.004300014581531286,
-0.004300014581531286,
-0.004220013972371817,
0.004300014581531286,
-0.004300014581531286,
-0.004300014581531286,
-0.004300014581531286,
-0.004300014581531286,
-0.0032200063578784466,
-0.004300014581531286,
-0.003100005444139242,
0.004300014581531286,
-0.004300014581531286,
-0.004300014581531286,
-0.004300014581531286,
-0.004300014581531286,
-0.004300014581531286,
0.004300014581531286,
-0.004300014581531286,
0.004300014581531286,
-0.004300014581531286,
-0.004300014581531286,
0.0034200078807771206,
0.004300014581531286,
-0.004300014581531286,
0.004300014581531286,
0.004300014581531286,
-0.004300014581531286,
0.0014999986160546541,
-0.004300014581531286,
-0.004300014581531286,
0.004300014581531286,
0.004060012754052877,
-0.004300014581531286,
-0.004300014581531286,
-0.0014599986607208848,
-0.004300014581531286,
0.004300014581531286,
-0.004300014581531286,
-0.004300014581531286,
0.0032200063578784466,
0.004300014581531286,
0.004300014581531286,
0.004300014581531286,
-0.004300014581531286,
0.004300014581531286,
0.004220013972371817
],
"bias_update_speed": 1e-05,
"num_experts": 60,
"module_type": "AuxFreeQwen2MoeSparseMoeBlock",
"device": "cuda:0",
"dtype": "torch.float32"
},
"model.layers.6.mlp": {
"bias_values": [
0.004300014581531286,
0.004300014581531286,
-0.0038600112311542034,
0.004300014581531286,
0.004300014581531286,
0.004300014581531286,
0.003740010317414999,
-0.004300014581531286,
-0.004300014581531286,
-0.004300014581531286,
-0.004300014581531286,
-0.0012599994661286473,
-0.004300014581531286,
-0.004300014581531286,
-0.003980012144893408,
0.004300014581531286,
-0.0042600142769515514,
0.0013799990992993116,
-0.004300014581531286,
-0.0029800045304000378,
0.004300014581531286,
0.004300014581531286,
-0.004300014581531286,
-0.004300014581531286,
-0.001739997649565339,
-0.004020012449473143,
-0.004300014581531286,
-0.004300014581531286,
-0.004300014581531286,
-0.004220013972371817,
0.004300014581531286,
0.0038600112311542034,
0.004300014581531286,
-0.004300014581531286,
-0.004300014581531286,
-0.004300014581531286,
-0.004300014581531286,
0.004300014581531286,
-0.004300014581531286,
-0.004300014581531286,
-0.004300014581531286,
-0.004300014581531286,
-0.0042600142769515514,
-0.004300014581531286,
-0.004300014581531286,
0.0031800060532987118,
0.004220013972371817,
-0.004300014581531286,
-0.004300014581531286,
-0.004300014581531286,
-0.0032200063578784466,
-0.0019799969159066677,
0.004300014581531286,
-0.004300014581531286,
-0.0024600005708634853,
-0.004300014581531286,
-0.004300014581531286,
0.004300014581531286,
0.003540008794516325,
-0.004300014581531286
],
"bias_update_speed": 1e-05,
"num_experts": 60,
"module_type": "AuxFreeQwen2MoeSparseMoeBlock",
"device": "cuda:0",
"dtype": "torch.float32"
},
"model.layers.7.mlp": {
"bias_values": [
-0.004300014581531286,
-0.004300014581531286,
-0.004300014581531286,
-0.0042600142769515514,
0.0042600142769515514,
-0.0018199972109869123,
-0.004300014581531286,
-0.004300014581531286,
-0.004300014581531286,
0.004300014581531286,
0.004300014581531286,
-0.004300014581531286,
0.004300014581531286,
0.003300006967037916,
-0.004300014581531286,
-0.0026200017891824245,
-0.004300014581531286,
-0.004140013363212347,
0.004300014581531286,
-0.004300014581531286,
-0.004300014581531286,
0.004300014581531286,
-0.004300014581531286,
-0.004300014581531286,
-0.004300014581531286,
0.004300014581531286,
-0.004020012449473143,
-0.004300014581531286,
-0.004300014581531286,
-0.0036600097082555294,
-0.004300014581531286,
0.0009800005936995149,
0.004300014581531286,
-0.004300014581531286,
-0.004300014581531286,
-0.004300014581531286,
-0.004300014581531286,
0.004300014581531286,
-0.004300014581531286,
-0.0042600142769515514,
-0.004180013667792082,
-0.0036200094036757946,
0.004300014581531286,
0.004300014581531286,
0.0017799974884837866,
0.004300014581531286,
-0.004300014581531286,
-0.004300014581531286,
-0.004300014581531286,
0.004300014581531286,
-0.004300014581531286,
-0.004300014581531286,
-0.004300014581531286,
0.0021799984388053417,
0.0031800060532987118,
0.004300014581531286,
-0.004300014581531286,
-0.004300014581531286,
-0.004300014581531286,
0.004300014581531286
],
"bias_update_speed": 1e-05,
"num_experts": 60,
"module_type": "AuxFreeQwen2MoeSparseMoeBlock",
"device": "cuda:0",
"dtype": "torch.float32"
},
"model.layers.8.mlp": {
"bias_values": [
-0.004300014581531286,
-0.004300014581531286,
-0.004300014581531286,
0.004300014581531286,
0.004300014581531286,
-0.003300006967037916,
0.004300014581531286,
-0.004300014581531286,
0.003780010621994734,
-0.004300014581531286,
0.004300014581531286,
-0.004300014581531286,
0.004300014581531286,
-0.004300014581531286,
-0.002900003921240568,
0.004300014581531286,
-0.004300014581531286,
-0.004300014581531286,
-0.004300014581531286,
-0.004300014581531286,
-5.9999980294378474e-05,
0.004300014581531286,
-0.004300014581531286,
0.0023799999617040157,
-0.004300014581531286,
-0.004300014581531286,
-0.0007400002796202898,
-0.002540001180022955,
0.0023799999617040157,
-0.004300014581531286,
0.004300014581531286,
-0.004220013972371817,
-0.004300014581531286,
-0.004300014581531286,
0.0042600142769515514,
0.003940011840313673,
0.003300006967037916,
0.003380007576197386,
-0.004300014581531286,
-0.003140005748718977,
-0.004300014581531286,
-0.004140013363212347,
0.0020199972204864025,
0.004180013667792082,
-0.004300014581531286,
-0.004300014581531286,
-0.004300014581531286,
0.004300014581531286,
-0.004300014581531286,
-0.004300014581531286,
-0.004300014581531286,
-0.0042600142769515514,
-0.004300014581531286,
0.004300014581531286,
-0.004300014581531286,
0.0026600020937621593,
0.004300014581531286,
-0.004300014581531286,
0.0010200005490332842,
-0.004300014581531286
],
"bias_update_speed": 1e-05,
"num_experts": 60,
"module_type": "AuxFreeQwen2MoeSparseMoeBlock",
"device": "cuda:0",
"dtype": "torch.float32"
},
"model.layers.9.mlp": {
"bias_values": [
-0.004180013667792082,
-0.004300014581531286,
-0.004300014581531286,
-0.004300014581531286,
-0.0027800030075013638,
-0.003140005748718977,
-0.004300014581531286,
-0.004300014581531286,
-0.00350000848993659,
-0.004300014581531286,
-0.004300014581531286,
0.004300014581531286,
-0.004300014581531286,
-0.004300014581531286,
-0.004300014581531286,
-0.004300014581531286,
-0.0034600081853568554,
-0.002740002702921629,
-0.004180013667792082,
-0.004300014581531286,
0.0032200063578784466,
0.004300014581531286,
-0.004300014581531286,
0.004300014581531286,
0.004300014581531286,
-0.004300014581531286,
-0.003300006967037916,
-0.004300014581531286,
-0.004300014581531286,
-0.004300014581531286,
-0.004300014581531286,
0.004300014581531286,
0.004300014581531286,
0.0038200109265744686,
-0.004300014581531286,
-0.004300014581531286,
0.004300014581531286,
0.003980012144893408,
-0.004300014581531286,
-0.004300014581531286,
-0.003740010317414999,
-0.004300014581531286,
0.004300014581531286,
-0.000499999790918082,
0.004300014581531286,
-0.004300014581531286,
0.004300014581531286,
-0.004300014581531286,
-0.0042600142769515514,
-0.004180013667792082,
-0.004300014581531286,
-0.004300014581531286,
0.004300014581531286,
0.0009800005936995149,
0.0042600142769515514,
-0.004300014581531286,
-0.004300014581531286,
-0.004300014581531286,
-0.004300014581531286,
-0.004300014581531286
],
"bias_update_speed": 1e-05,
"num_experts": 60,
"module_type": "AuxFreeQwen2MoeSparseMoeBlock",
"device": "cuda:0",
"dtype": "torch.float32"
},
"model.layers.10.mlp": {
"bias_values": [
-0.004300014581531286,
0.0016999978106468916,
-0.004300014581531286,
-0.003300006967037916,
0.0016199980163946748,
-0.004300014581531286,
-0.004300014581531286,
0.004300014581531286,
-0.004300014581531286,
-0.002139998134225607,
-0.004300014581531286,
-0.003980012144893408,
0.003740010317414999,
-0.004220013972371817,
-0.004300014581531286,
-0.004300014581531286,
-0.004300014581531286,
0.004300014581531286,
-0.004300014581531286,
0.004300014581531286,
-0.004300014581531286,
-0.004220013972371817,
-0.004300014581531286,
-0.004300014581531286,
-0.004300014581531286,
0.004300014581531286,
-0.004300014581531286,
0.004300014581531286,
-0.004300014581531286,
-0.004300014581531286,
0.004300014581531286,
0.004100013058632612,
-0.004300014581531286,
-0.004300014581531286,
0.004220013972371817,
-0.004300014581531286,
-0.004220013972371817,
0.0021799984388053417,
0.0014999986160546541,
-0.0012999993050470948,
-0.004300014581531286,
0.004300014581531286,
-0.0029800045304000378,
0.004300014581531286,
-0.0042600142769515514,
0.0038600112311542034,
-0.004300014581531286,
-0.0042600142769515514,
-0.004300014581531286,
0.004300014581531286,
0.004300014581531286,
-0.004300014581531286,
-0.004300014581531286,
-0.004300014581531286,
-0.004300014581531286,
0.004300014581531286,
-0.004100013058632612,
0.002740002702921629,
0.004060012754052877,
0.0028600036166608334
],
"bias_update_speed": 1e-05,
"num_experts": 60,
"module_type": "AuxFreeQwen2MoeSparseMoeBlock",
"device": "cuda:0",
"dtype": "torch.float32"
},
"model.layers.11.mlp": {
"bias_values": [
-0.004300014581531286,
0.004300014581531286,
-0.004300014581531286,
0.004300014581531286,
-0.004220013972371817,
-0.004300014581531286,
-0.004060012754052877,
0.004100013058632612,
-0.004300014581531286,
0.0042600142769515514,
-0.004300014581531286,
-0.004300014581531286,
-0.004300014581531286,
0.004300014581531286,
-0.0042600142769515514,
0.0015399983385577798,
0.003940011840313673,
-0.002900003921240568,
-0.004300014581531286,
-0.004140013363212347,
-0.004300014581531286,
-0.004300014581531286,
0.004300014581531286,
-0.004300014581531286,
-0.004180013667792082,
-0.004300014581531286,
-0.004300014581531286,
-0.004300014581531286,
-0.0036600097082555294,
-0.004300014581531286,
-0.0042600142769515514,
0.003140005748718977,
-0.004300014581531286,
-0.004300014581531286,
-0.004300014581531286,
-0.004300014581531286,
-0.004300014581531286,
0.004300014581531286,
0.0011000001104548573,
0.004300014581531286,
0.004300014581531286,
0.004300014581531286,
0.004300014581531286,
0.004300014581531286,
-0.004300014581531286,
0.004300014581531286,
-0.00029999998514540493,
-0.004300014581531286,
-0.004300014581531286,
-0.003980012144893408,
0.003780010621994734,
-0.004300014581531286,
-0.004300014581531286,
0.004300014581531286,
-0.004300014581531286,
-0.004300014581531286,
-0.004300014581531286,
-0.004300014581531286,
-0.004300014581531286,
-0.004300014581531286
],
"bias_update_speed": 1e-05,
"num_experts": 60,
"module_type": "AuxFreeQwen2MoeSparseMoeBlock",
"device": "cuda:0",
"dtype": "torch.float32"
},
"model.layers.12.mlp": {
"bias_values": [
-0.004300014581531286,
-0.004300014581531286,
-0.004300014581531286,
-0.004300014581531286,
-0.004300014581531286,
-0.004300014581531286,
-0.0042600142769515514,
-0.0011799997882917523,
-0.004300014581531286,
-0.004300014581531286,
0.0005399998626671731,
-0.0036200094036757946,
-0.004300014581531286,
0.004300014581531286,
-0.0017799973720684648,
-0.002099997829645872,
0.004300014581531286,
-0.004300014581531286,
-0.004300014581531286,
0.0010200004326179624,
-0.004300014581531286,
-0.004300014581531286,
-0.004300014581531286,
-0.00013999994553159922,
-0.004300014581531286,
-0.004300014581531286,
0.004300014581531286,
-0.002099997829645872,
-0.004300014581531286,
-0.004020012449473143,
0.004300014581531286,
-0.004300014581531286,
-0.004300014581531286,
-0.004300014581531286,
0.004300014581531286,
-0.002700002398341894,
0.004300014581531286,
-0.0004999999073334038,
0.004300014581531286,
-0.002900003921240568,
-0.004300014581531286,
-0.004300014581531286,
0.004300014581531286,
-0.004300014581531286,
-0.004300014581531286,
-0.004300014581531286,
0.004300014581531286,
-0.004300014581531286,
0.004300014581531286,
-0.004300014581531286,
-0.004300014581531286,
-0.004300014581531286,
-0.003100005444139242,
-0.004300014581531286,
-0.0015399983385577798,
-0.004300014581531286,
-0.004300014581531286,
-0.004300014581531286,
-0.004300014581531286,
0.004300014581531286
],
"bias_update_speed": 1e-05,
"num_experts": 60,
"module_type": "AuxFreeQwen2MoeSparseMoeBlock",
"device": "cuda:0",
"dtype": "torch.float32"
},
"model.layers.13.mlp": {
"bias_values": [
-0.004300014581531286,
0.001659997971728444,
-0.004300014581531286,
-0.0011399999493733048,
-0.0042600142769515514,
0.0027800030075013638,
-0.004300014581531286,
0.004300014581531286,
-0.004300014581531286,
-0.004300014581531286,
-0.004300014581531286,
0.004300014581531286,
-0.004300014581531286,
0.0013799989828839898,
0.004300014581531286,
-0.004300014581531286,
0.004300014581531286,
-0.004300014581531286,
-0.00358000909909606,
0.002700002398341894,
-0.004300014581531286,
0.00037999998312443495,
0.0029800045304000378,
-0.004300014581531286,
0.004300014581531286,
-0.004300014581531286,
0.004300014581531286,
0.003940011840313673,
0.004300014581531286,
0.0042600142769515514,
0.004300014581531286,
0.004300014581531286,
-0.004300014581531286,
-0.004300014581531286,
-0.004300014581531286,
0.004300014581531286,
-0.004300014581531286,
-0.004300014581531286,
-0.004300014581531286,
-0.004300014581531286,
-0.00033999988227151334,
-0.004300014581531286,
-0.004300014581531286,
-0.004300014581531286,
0.004220013972371817,
0.004300014581531286,
-0.004300014581531286,
-0.0042600142769515514,
-0.004300014581531286,
-0.004300014581531286,
-0.004300014581531286,
-0.004300014581531286,
-0.004300014581531286,
0.004300014581531286,
-0.0042600142769515514,
-0.0042600142769515514,
-0.004300014581531286,
-0.004300014581531286,
-0.003740010317414999,
-0.004300014581531286
],
"bias_update_speed": 1e-05,
"num_experts": 60,
"module_type": "AuxFreeQwen2MoeSparseMoeBlock",
"device": "cuda:0",
"dtype": "torch.float32"
},
"model.layers.14.mlp": {
"bias_values": [
0.004300014581531286,
0.004300014581531286,
0.004300014581531286,
0.0018999968888238072,
-0.004300014581531286,
-0.004140013363212347,
0.00078000029316172,
0.0017799973720684648,
-0.004300014581531286,
-0.004300014581531286,
-0.004300014581531286,
0.004300014581531286,
0.004300014581531286,
-0.004300014581531286,
-0.004300014581531286,
-0.0042600142769515514,
-0.004300014581531286,
-0.004300014581531286,
-0.004300014581531286,
-0.0042600142769515514,
0.0004999998491257429,
0.004300014581531286,
-0.004300014581531286,
-0.004300014581531286,
0.004300014581531286,
0.004020012449473143,
-0.004300014581531286,
-0.0042600142769515514,
-0.004300014581531286,
-0.0038600112311542034,
-0.004300014581531286,
-0.004300014581531286,
0.0036600097082555294,
-0.0042600142769515514,
-0.004300014581531286,
-0.004220013972371817,
0.004300014581531286,
-0.004300014581531286,
0.0042600142769515514,
0.004220013972371817,
-0.001739997649565339,
0.004220013972371817,
-0.0042600142769515514,
-0.004300014581531286,
-0.004300014581531286,
0.004180013667792082,
-0.004300014581531286,
0.004300014581531286,
0.002940004225820303,
0.0038600112311542034,
-0.004300014581531286,
0.004300014581531286,
-0.0042600142769515514,
-0.004300014581531286,
-0.004300014581531286,
0.004300014581531286,
-0.004300014581531286,
-0.004300014581531286,
-0.004300014581531286,
-0.004300014581531286
],
"bias_update_speed": 1e-05,
"num_experts": 60,
"module_type": "AuxFreeQwen2MoeSparseMoeBlock",
"device": "cuda:0",
"dtype": "torch.float32"
},
"model.layers.15.mlp": {
"bias_values": [
-0.004300014581531286,
-0.004300014581531286,
0.004180013667792082,
-0.004300014581531286,
-0.0031800060532987118,
-0.004300014581531286,
-0.004300014581531286,
-0.004300014581531286,
0.0042600142769515514,
-0.003780010621994734,
0.004300014581531286,
0.004300014581531286,
-0.004300014581531286,
0.004300014581531286,
-0.004100013058632612,
0.004300014581531286,
-0.004300014581531286,
0.0021799984388053417,
-0.004300014581531286,
-0.004300014581531286,
0.004300014581531286,
-0.004300014581531286,
0.004300014581531286,
0.004300014581531286,
-0.004300014581531286,
0.004060012754052877,
-0.004300014581531286,
0.002540001180022955,
-0.004300014581531286,
-0.004300014581531286,
-0.004300014581531286,
0.004220013972371817,
0.004220013972371817,
0.004300014581531286,
0.004300014581531286,
0.0026600020937621593,
-0.003940011840313673,
-0.004180013667792082,
0.004300014581531286,
-0.004300014581531286,
-0.0017399975331500173,
0.004300014581531286,
-0.004300014581531286,
-0.004300014581531286,
0.0034200078807771206,
0.0003399999695830047,
0.004300014581531286,
-0.0032600066624581814,
0.004300014581531286,
-0.004300014581531286,
-0.004300014581531286,
0.0042600142769515514,
-0.004300014581531286,
-0.004300014581531286,
0.004300014581531286,
-0.0017399975331500173,
-0.004300014581531286,
-0.004220013972371817,
-0.0042600142769515514,
-0.0025800014846026897
],
"bias_update_speed": 1e-05,
"num_experts": 60,
"module_type": "AuxFreeQwen2MoeSparseMoeBlock",
"device": "cuda:0",
"dtype": "torch.float32"
},
"model.layers.16.mlp": {
"bias_values": [
-0.004300014581531286,
0.0029800045304000378,
0.004300014581531286,
-0.004300014581531286,
-0.004300014581531286,
0.004300014581531286,
-0.0011399999493733048,
-0.004300014581531286,
0.004300014581531286,
-0.004300014581531286,
-0.0016999976942315698,
0.004300014581531286,
-0.004300014581531286,
-0.004300014581531286,
-0.004300014581531286,
0.0023799999617040157,
-0.004300014581531286,
-0.004300014581531286,
-0.004300014581531286,
-0.004300014581531286,
-0.004300014581531286,
-0.004300014581531286,
0.002540001180022955,
-0.004100013058632612,
-0.004300014581531286,
-0.004300014581531286,
-0.004300014581531286,
0.0042600142769515514,
0.004300014581531286,
-0.004300014581531286,
-0.004300014581531286,
-0.0032200063578784466,
0.004300014581531286,
0.004300014581531286,
-0.004300014581531286,
-0.004300014581531286,
-0.004300014581531286,
-0.004300014581531286,
-0.004060012754052877,
-0.0042600142769515514,
0.004300014581531286,
-0.004300014581531286,
-0.004300014581531286,
0.004300014581531286,
-0.004300014581531286,
-0.004300014581531286,
0.004300014581531286,
-0.004300014581531286,
0.004300014581531286,
-0.004300014581531286,
-0.004300014581531286,
-0.0005799999344162643,
-0.004100013058632612,
-0.004300014581531286,
0.004300014581531286,
0.002139998134225607,
0.004300014581531286,
-0.004300014581531286,
0.004180013667792082,
0.004300014581531286
],
"bias_update_speed": 1e-05,
"num_experts": 60,
"module_type": "AuxFreeQwen2MoeSparseMoeBlock",
"device": "cuda:0",
"dtype": "torch.float32"
},
"model.layers.17.mlp": {
"bias_values": [
-0.004300014581531286,
-0.004300014581531286,
0.0018599970499053597,
0.004300014581531286,
-0.0028200033120810986,
0.004300014581531286,
-0.002740002702921629,
0.004300014581531286,
-0.004300014581531286,
0.004300014581531286,
-0.004300014581531286,
-0.004300014581531286,
-0.0042600142769515514,
-0.0018199972109869123,
0.004300014581531286,
-0.004300014581531286,
0.004300014581531286,
-0.004300014581531286,
-0.004300014581531286,
-0.004300014581531286,
-0.004300014581531286,
0.004300014581531286,
-0.002099997829645872,
-0.004300014581531286,
-0.004060012754052877,
-0.004300014581531286,
-0.00350000848993659,
-0.004300014581531286,
-0.004300014581531286,
-0.004300014581531286,
-0.004300014581531286,
0.004300014581531286,
0.0027800030075013638,
0.0034600081853568554,
-0.004300014581531286,
-0.004300014581531286,
-0.004300014581531286,
-0.004300014581531286,
-0.004300014581531286,
-0.004300014581531286,
0.0018999968888238072,
-0.004300014581531286,
-0.0010200004326179624,
-0.004300014581531286,
0.004300014581531286,
0.00029999998514540493,
-0.004300014581531286,
-0.004300014581531286,
-0.002740002702921629,
-0.004300014581531286,
0.004300014581531286,
-0.004300014581531286,
0.004220013972371817,
-0.004300014581531286,
0.0042600142769515514,
0.004300014581531286,
-0.004300014581531286,
0.0024200002662837505,
0.004300014581531286,
0.003780010621994734
],
"bias_update_speed": 1e-05,
"num_experts": 60,
"module_type": "AuxFreeQwen2MoeSparseMoeBlock",
"device": "cuda:0",
"dtype": "torch.float32"
},
"model.layers.18.mlp": {
"bias_values": [
-0.004300014581531286,
-0.004300014581531286,
-0.0030200048349797726,
-0.004300014581531286,
-0.003140005748718977,
-0.004300014581531286,
-0.004300014581531286,
-0.004300014581531286,
0.004300014581531286,
-0.004300014581531286,
0.003380007576197386,
-0.004300014581531286,
-0.004300014581531286,
-0.004300014581531286,
-0.002900003921240568,
-0.004300014581531286,
0.004180013667792082,
0.004300014581531286,
-0.004300014581531286,
0.004300014581531286,
0.002339999657124281,
0.004300014581531286,
-0.004300014581531286,
-0.00350000848993659,
-0.004300014581531286,
-0.0028600036166608334,
0.004300014581531286,
-0.0030600051395595074,
0.004300014581531286,
0.004300014581531286,
0.004300014581531286,
-0.004300014581531286,
-0.004300014581531286,
0.004300014581531286,
-0.0004199998511467129,
-0.0036200094036757946,
-0.004300014581531286,
0.004300014581531286,
-0.004300014581531286,
-0.004300014581531286,
-0.0016199980163946748,
-0.004300014581531286,
0.00358000909909606,
-0.004300014581531286,
-0.004300014581531286,
-0.004300014581531286,
-0.004300014581531286,
-0.004300014581531286,
0.004140013363212347,
0.004300014581531286,
-0.004300014581531286,
-0.004300014581531286,
0.004300014581531286,
-0.004300014581531286,
-0.0011399999493733048,
0.004300014581531286,
-0.004180013667792082,
-0.004300014581531286,
-0.0039000115357339382,
-0.004300014581531286
],
"bias_update_speed": 1e-05,
"num_experts": 60,
"module_type": "AuxFreeQwen2MoeSparseMoeBlock",
"device": "cuda:0",
"dtype": "torch.float32"
},
"model.layers.19.mlp": {
"bias_values": [
0.004300014581531286,
-0.004300014581531286,
0.004300014581531286,
-0.004300014581531286,
-0.004300014581531286,
-0.004300014581531286,
0.00358000909909606,
-0.004300014581531286,
0.004300014581531286,
-0.004300014581531286,
-0.004300014581531286,
0.004300014581531286,
0.004300014581531286,
-0.004300014581531286,
-0.004300014581531286,
-0.003980012144893408,
0.004300014581531286,
-0.004300014581531286,
-0.004300014581531286,
-0.004300014581531286,
-0.0014599986607208848,
0.004100013058632612,
-0.004300014581531286,
0.004140013363212347,
0.0014599987771362066,
-0.004300014581531286,
0.0036200094036757946,
-0.004300014581531286,
0.0022599990479648113,
-0.004300014581531286,
0.004300014581531286,
0.004300014581531286,
-0.004300014581531286,
0.004300014581531286,
0.004300014581531286,
-0.0034200078807771206,
-0.004300014581531286,
0.004300014581531286,
0.003340007271617651,
-0.0042600142769515514,
-0.004300014581531286,
-0.004300014581531286,
-0.004300014581531286,
0.004300014581531286,
-0.004300014581531286,
-0.004300014581531286,
-0.004300014581531286,
0.0042600142769515514,
-0.004300014581531286,
-0.004300014581531286,
-0.004300014581531286,
-0.004300014581531286,
0.0009800007101148367,
-0.004300014581531286,
-0.004220013972371817,
0.0020199972204864025,
-0.004300014581531286,
-0.004300014581531286,
-0.004300014581531286,
-0.004300014581531286
],
"bias_update_speed": 1e-05,
"num_experts": 60,
"module_type": "AuxFreeQwen2MoeSparseMoeBlock",
"device": "cuda:0",
"dtype": "torch.float32"
},
"model.layers.20.mlp": {
"bias_values": [
-0.004220013972371817,
0.0036600097082555294,
0.004300014581531286,
-0.004300014581531286,
-0.004300014581531286,
-0.004300014581531286,
0.004300014581531286,
0.00350000848993659,
-0.00358000909909606,
-0.004300014581531286,
0.003980012144893408,
0.00350000848993659,
-0.004300014581531286,
-0.004300014581531286,
0.004300014581531286,
-0.0036200094036757946,
-0.0027800030075013638,
0.004300014581531286,
-0.0022599990479648113,
-0.004300014581531286,
-0.004300014581531286,
-0.004300014581531286,
-0.004300014581531286,
-0.004300014581531286,
-0.004300014581531286,
-0.0011400000657886267,
-0.004180013667792082,
-0.004300014581531286,
-0.004300014581531286,
0.004300014581531286,
-0.004300014581531286,
-0.004300014581531286,
0.0036600097082555294,
-0.0042600142769515514,
0.002700002398341894,
-0.00037999992491677403,
-0.004300014581531286,
-0.004300014581531286,
-0.004300014581531286,
0.004300014581531286,
-0.004300014581531286,
-0.004300014581531286,
-0.004300014581531286,
0.0042600142769515514,
-0.004300014581531286,
0.004300014581531286,
-0.0036600097082555294,
-0.004300014581531286,
0.004300014581531286,
0.004300014581531286,
0.004300014581531286,
0.0036200094036757946,
-0.004300014581531286,
-0.004300014581531286,
-0.004300014581531286,
-0.004300014581531286,
-0.004300014581531286,
0.004300014581531286,
0.004300014581531286,
-0.004300014581531286
],
"bias_update_speed": 1e-05,
"num_experts": 60,
"module_type": "AuxFreeQwen2MoeSparseMoeBlock",
"device": "cuda:0",
"dtype": "torch.float32"
},
"model.layers.21.mlp": {
"bias_values": [
-0.004300014581531286,
0.0038200109265744686,
-0.004300014581531286,
-0.0042600142769515514,
-0.004300014581531286,
0.004300014581531286,
0.0028200033120810986,
-0.004300014581531286,
-0.003100005444139242,
-0.004180013667792082,
-0.0042600142769515514,
-0.0034200078807771206,
-0.003980012144893408,
-0.004300014581531286,
-0.004300014581531286,
-0.004300014581531286,
-0.004300014581531286,
0.004300014581531286,
0.004300014581531286,
-0.004300014581531286,
-0.0014999986160546541,
-0.004300014581531286,
-0.004300014581531286,
-0.004300014581531286,
0.004300014581531286,
-0.004300014581531286,
-0.004300014581531286,
-0.0032200063578784466,
0.004140013363212347,
-0.004060012754052877,
-0.004300014581531286,
0.004300014581531286,
-0.004300014581531286,
-0.004180013667792082,
-0.0013799989828839898,
-0.004300014581531286,
0.004300014581531286,
-0.004300014581531286,
0.0030200048349797726,
-0.004300014581531286,
0.004300014581531286,
0.004300014581531286,
-0.004300014581531286,
0.004300014581531286,
-0.00250000087544322,
-0.004300014581531286,
-0.004300014581531286,
0.004300014581531286,
-0.0026200017891824245,
0.0021799984388053417,
-0.004300014581531286,
-0.004220013972371817,
-0.004300014581531286,
-0.004300014581531286,
-0.004220013972371817,
-0.004300014581531286,
-0.004300014581531286,
-0.004300014581531286,
-0.004300014581531286,
0.004300014581531286
],
"bias_update_speed": 1e-05,
"num_experts": 60,
"module_type": "AuxFreeQwen2MoeSparseMoeBlock",
"device": "cuda:0",
"dtype": "torch.float32"
},
"model.layers.22.mlp": {
"bias_values": [
-0.004300014581531286,
-0.004300014581531286,
0.004300014581531286,
0.002299999352544546,
0.0011000001104548573,
-0.004300014581531286,
-0.003740010317414999,
0.004300014581531286,
0.004300014581531286,
0.00010000001930166036,
-0.004300014581531286,
-0.004300014581531286,
0.0009000005666166544,
-0.003980012144893408,
-0.004300014581531286,
-0.004300014581531286,
-0.004300014581531286,
0.004300014581531286,
-0.004300014581531286,
-0.004300014581531286,
0.004020012449473143,
-0.004300014581531286,
-0.002740002702921629,
0.000780000351369381,
0.003980012144893408,
-0.004300014581531286,
0.004300014581531286,
-0.004020012449473143,
-0.004300014581531286,
0.0042600142769515514,
-0.0039000115357339382,
0.004300014581531286,
-0.004300014581531286,
0.004300014581531286,
0.004300014581531286,
-0.004300014581531286,
0.004300014581531286,
-0.004300014581531286,
0.004300014581531286,
0.004300014581531286,
0.004300014581531286,
-0.004300014581531286,
-0.004300014581531286,
0.004300014581531286,
-0.004300014581531286,
-0.004300014581531286,
-0.004300014581531286,
-0.004300014581531286,
-0.00041999988025054336,
-0.004300014581531286,
0.004300014581531286,
0.004300014581531286,
-0.004300014581531286,
0.004300014581531286,
-0.004300014581531286,
0.004300014581531286,
0.004140013363212347,
-0.004220013972371817,
0.004300014581531286,
-0.004300014581531286
],
"bias_update_speed": 1e-05,
"num_experts": 60,
"module_type": "AuxFreeQwen2MoeSparseMoeBlock",
"device": "cuda:0",
"dtype": "torch.float32"
},
"model.layers.23.mlp": {
"bias_values": [
0.004300014581531286,
-0.004300014581531286,
-0.004300014581531286,
-0.004300014581531286,
-0.004300014581531286,
-0.004300014581531286,
0.004300014581531286,
-0.004300014581531286,
0.004300014581531286,
0.003140005748718977,
0.004300014581531286,
-0.004300014581531286,
-0.004300014581531286,
-0.004300014581531286,
0.003140005748718977,
-0.004300014581531286,
-0.004300014581531286,
-0.004300014581531286,
0.004140013363212347,
-0.004300014581531286,
-0.004300014581531286,
-0.0036200094036757946,
0.003700010012835264,
0.004100013058632612,
-0.004300014581531286,
0.003700010012835264,
-0.0038200109265744686,
-0.004300014581531286,
0.004300014581531286,
-0.004300014581531286,
-0.004180013667792082,
-0.0034200078807771206,
0.004300014581531286,
-0.004300014581531286,
-0.004300014581531286,
-0.004300014581531286,
-0.004300014581531286,
-0.0018599971663206816,
-0.004300014581531286,
0.004300014581531286,
0.004300014581531286,
-0.004300014581531286,
0.004300014581531286,
0.004300014581531286,
-0.004300014581531286,
-0.0034600081853568554,
-0.004300014581531286,
0.004300014581531286,
-0.002099997829645872,
-0.0042600142769515514,
-0.004300014581531286,
0.004300014581531286,
-0.004300014581531286,
0.002540001180022955,
-0.004300014581531286,
0.0007000002660788596,
-0.004300014581531286,
0.004300014581531286,
-0.004300014581531286,
-0.004300014581531286
],
"bias_update_speed": 1e-05,
"num_experts": 60,
"module_type": "AuxFreeQwen2MoeSparseMoeBlock",
"device": "cuda:0",
"dtype": "torch.float32"
}
}
}