Model: xd2010/Qwen1.5-MOE-aux-free-sft-math7k-1e-3-gamma-1epoch Source: Original Platform
1667 lines
49 KiB
JSON
1667 lines
49 KiB
JSON
{
|
|
"metadata": {
|
|
"total_moe_layers": 24,
|
|
"save_timestamp": "2026-03-22T20:05:51.747979",
|
|
"model_type": "Qwen2MoeForCausalLM",
|
|
"pytorch_version": "2.6.0+cu124",
|
|
"description": "Auxiliary-loss-free MoE bias states saved during training"
|
|
},
|
|
"moe_bias_states": {
|
|
"model.layers.0.mlp": {
|
|
"bias_values": [
|
|
0.004300014581531286,
|
|
-0.0042600142769515514,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004060012754052877,
|
|
-0.004300014581531286,
|
|
0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
0.004300014581531286,
|
|
-0.004300014581531286,
|
|
0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.003980012144893408,
|
|
-0.004020012449473143,
|
|
0.004300014581531286,
|
|
0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.0042600142769515514,
|
|
-0.004220013972371817,
|
|
-0.0013399991439655423,
|
|
-0.004300014581531286,
|
|
0.004300014581531286,
|
|
-0.004300014581531286,
|
|
0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.003740010317414999,
|
|
-0.0036600097082555294,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
0.004300014581531286,
|
|
0.004300014581531286,
|
|
0.004060012754052877,
|
|
-0.004300014581531286,
|
|
-0.003980012144893408,
|
|
0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
0.004300014581531286,
|
|
-0.0019799969159066677,
|
|
0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.0042600142769515514,
|
|
0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286
|
|
],
|
|
"bias_update_speed": 1e-05,
|
|
"num_experts": 60,
|
|
"module_type": "AuxFreeQwen2MoeSparseMoeBlock",
|
|
"device": "cuda:0",
|
|
"dtype": "torch.float32"
|
|
},
|
|
"model.layers.1.mlp": {
|
|
"bias_values": [
|
|
0.0005799998762086034,
|
|
-0.004220013972371817,
|
|
0.0003000000142492354,
|
|
0.003980012144893408,
|
|
-0.0034600081853568554,
|
|
-0.004300014581531286,
|
|
-0.0020599975250661373,
|
|
-0.0042600142769515514,
|
|
-0.00250000087544322,
|
|
-0.002940004225820303,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004140013363212347,
|
|
0.004300014581531286,
|
|
-0.003300006967037916,
|
|
-0.004020012449473143,
|
|
0.004300014581531286,
|
|
0.004020012449473143,
|
|
-0.0012199996272101998,
|
|
0.004300014581531286,
|
|
-0.004300014581531286,
|
|
0.004300014581531286,
|
|
-0.004300014581531286,
|
|
0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
0.0042600142769515514,
|
|
0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
0.002740002702921629,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.0014999986160546541,
|
|
0.004300014581531286,
|
|
-0.00078000029316172,
|
|
-0.004220013972371817,
|
|
-0.0039000115357339382,
|
|
-0.004300014581531286,
|
|
-0.003780010621994734,
|
|
-0.002700002398341894,
|
|
-0.004300014581531286,
|
|
-0.0042600142769515514,
|
|
0.0003400000277906656,
|
|
0.004300014581531286,
|
|
-0.004300014581531286,
|
|
0.004180013667792082,
|
|
0.004300014581531286,
|
|
0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
0.0042600142769515514,
|
|
-0.004300014581531286,
|
|
-0.0030600051395595074,
|
|
0.004300014581531286,
|
|
-0.004220013972371817,
|
|
-0.004300014581531286
|
|
],
|
|
"bias_update_speed": 1e-05,
|
|
"num_experts": 60,
|
|
"module_type": "AuxFreeQwen2MoeSparseMoeBlock",
|
|
"device": "cuda:0",
|
|
"dtype": "torch.float32"
|
|
},
|
|
"model.layers.2.mlp": {
|
|
"bias_values": [
|
|
0.0042600142769515514,
|
|
0.00358000909909606,
|
|
-0.004300014581531286,
|
|
-0.0022599990479648113,
|
|
-0.00033999994047917426,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.0013399991439655423,
|
|
0.004300014581531286,
|
|
-0.0032200063578784466,
|
|
-0.0029800045304000378,
|
|
-0.004300014581531286,
|
|
-0.002299999352544546,
|
|
-0.003140005748718977,
|
|
-0.0042600142769515514,
|
|
-0.004140013363212347,
|
|
0.004300014581531286,
|
|
-0.004020012449473143,
|
|
0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.0042600142769515514,
|
|
-0.004300014581531286,
|
|
0.004140013363212347,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.0008600004366599023,
|
|
0.004300014581531286,
|
|
0.004300014581531286,
|
|
-0.004140013363212347,
|
|
0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
0.0038600112311542034,
|
|
-0.003940011840313673,
|
|
-0.004300014581531286,
|
|
-0.0028200033120810986,
|
|
-0.004300014581531286,
|
|
0.004300014581531286
|
|
],
|
|
"bias_update_speed": 1e-05,
|
|
"num_experts": 60,
|
|
"module_type": "AuxFreeQwen2MoeSparseMoeBlock",
|
|
"device": "cuda:0",
|
|
"dtype": "torch.float32"
|
|
},
|
|
"model.layers.3.mlp": {
|
|
"bias_values": [
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
0.004060012754052877,
|
|
-0.0036600097082555294,
|
|
-0.004300014581531286,
|
|
-0.003380007576197386,
|
|
-0.004300014581531286,
|
|
-0.0042600142769515514,
|
|
0.004300014581531286,
|
|
0.004060012754052877,
|
|
-0.004300014581531286,
|
|
0.004300014581531286,
|
|
-0.004300014581531286,
|
|
0.0042600142769515514,
|
|
0.0009400006383657455,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
0.004300014581531286,
|
|
0.004300014581531286,
|
|
0.003740010317414999,
|
|
0.003740010317414999,
|
|
0.004140013363212347,
|
|
-0.0042600142769515514,
|
|
0.004300014581531286,
|
|
-0.004300014581531286,
|
|
0.003980012144893408,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
0.004300014581531286,
|
|
0.004300014581531286,
|
|
-0.004020012449473143,
|
|
0.004300014581531286,
|
|
-0.004300014581531286,
|
|
0.004180013667792082,
|
|
0.004300014581531286,
|
|
0.00358000909909606,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
0.004300014581531286,
|
|
-0.004300014581531286,
|
|
0.004300014581531286,
|
|
-0.0014999984996393323,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.003700010012835264,
|
|
0.004300014581531286,
|
|
0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.0013799989828839898,
|
|
-0.003940011840313673,
|
|
0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.0024200002662837505,
|
|
-0.004300014581531286,
|
|
-0.003740010317414999,
|
|
-0.0034600081853568554
|
|
],
|
|
"bias_update_speed": 1e-05,
|
|
"num_experts": 60,
|
|
"module_type": "AuxFreeQwen2MoeSparseMoeBlock",
|
|
"device": "cuda:0",
|
|
"dtype": "torch.float32"
|
|
},
|
|
"model.layers.4.mlp": {
|
|
"bias_values": [
|
|
0.004300014581531286,
|
|
-0.004180013667792082,
|
|
0.004300014581531286,
|
|
0.0011399999493733048,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
0.0038200109265744686,
|
|
0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.0032200063578784466,
|
|
-0.004300014581531286,
|
|
-0.0030600051395595074,
|
|
-0.004300014581531286,
|
|
-0.0026600020937621593,
|
|
0.004300014581531286,
|
|
0.004300014581531286,
|
|
0.004300014581531286,
|
|
-0.004300014581531286,
|
|
0.00350000848993659,
|
|
-0.0008600004948675632,
|
|
0.004300014581531286,
|
|
0.004300014581531286,
|
|
0.004300014581531286,
|
|
-0.002540001180022955,
|
|
-0.004300014581531286,
|
|
0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
0.004300014581531286,
|
|
-0.003940011840313673,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.0038200109265744686,
|
|
0.004300014581531286,
|
|
0.004300014581531286,
|
|
0.004300014581531286,
|
|
-0.004300014581531286,
|
|
0.004300014581531286,
|
|
0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.00358000909909606,
|
|
0.003740010317414999,
|
|
-0.004300014581531286,
|
|
0.004300014581531286,
|
|
-0.004300014581531286,
|
|
0.002940004225820303
|
|
],
|
|
"bias_update_speed": 1e-05,
|
|
"num_experts": 60,
|
|
"module_type": "AuxFreeQwen2MoeSparseMoeBlock",
|
|
"device": "cuda:0",
|
|
"dtype": "torch.float32"
|
|
},
|
|
"model.layers.5.mlp": {
|
|
"bias_values": [
|
|
0.004300014581531286,
|
|
-0.003380007576197386,
|
|
-0.004300014581531286,
|
|
-0.0036200094036757946,
|
|
-0.004100013058632612,
|
|
-0.004300014581531286,
|
|
0.0038600112311542034,
|
|
0.0038600112311542034,
|
|
0.0014999986160546541,
|
|
0.0034600081853568554,
|
|
-0.004300014581531286,
|
|
0.0034200078807771206,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004220013972371817,
|
|
0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.0032200063578784466,
|
|
-0.004300014581531286,
|
|
-0.003100005444139242,
|
|
0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
0.004300014581531286,
|
|
-0.004300014581531286,
|
|
0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
0.0034200078807771206,
|
|
0.004300014581531286,
|
|
-0.004300014581531286,
|
|
0.004300014581531286,
|
|
0.004300014581531286,
|
|
-0.004300014581531286,
|
|
0.0014999986160546541,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
0.004300014581531286,
|
|
0.004060012754052877,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.0014599986607208848,
|
|
-0.004300014581531286,
|
|
0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
0.0032200063578784466,
|
|
0.004300014581531286,
|
|
0.004300014581531286,
|
|
0.004300014581531286,
|
|
-0.004300014581531286,
|
|
0.004300014581531286,
|
|
0.004220013972371817
|
|
],
|
|
"bias_update_speed": 1e-05,
|
|
"num_experts": 60,
|
|
"module_type": "AuxFreeQwen2MoeSparseMoeBlock",
|
|
"device": "cuda:0",
|
|
"dtype": "torch.float32"
|
|
},
|
|
"model.layers.6.mlp": {
|
|
"bias_values": [
|
|
0.004300014581531286,
|
|
0.004300014581531286,
|
|
-0.0038600112311542034,
|
|
0.004300014581531286,
|
|
0.004300014581531286,
|
|
0.004300014581531286,
|
|
0.003740010317414999,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.0012599994661286473,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.003980012144893408,
|
|
0.004300014581531286,
|
|
-0.0042600142769515514,
|
|
0.0013799990992993116,
|
|
-0.004300014581531286,
|
|
-0.0029800045304000378,
|
|
0.004300014581531286,
|
|
0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.001739997649565339,
|
|
-0.004020012449473143,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004220013972371817,
|
|
0.004300014581531286,
|
|
0.0038600112311542034,
|
|
0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.0042600142769515514,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
0.0031800060532987118,
|
|
0.004220013972371817,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.0032200063578784466,
|
|
-0.0019799969159066677,
|
|
0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.0024600005708634853,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
0.004300014581531286,
|
|
0.003540008794516325,
|
|
-0.004300014581531286
|
|
],
|
|
"bias_update_speed": 1e-05,
|
|
"num_experts": 60,
|
|
"module_type": "AuxFreeQwen2MoeSparseMoeBlock",
|
|
"device": "cuda:0",
|
|
"dtype": "torch.float32"
|
|
},
|
|
"model.layers.7.mlp": {
|
|
"bias_values": [
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.0042600142769515514,
|
|
0.0042600142769515514,
|
|
-0.0018199972109869123,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
0.004300014581531286,
|
|
0.004300014581531286,
|
|
-0.004300014581531286,
|
|
0.004300014581531286,
|
|
0.003300006967037916,
|
|
-0.004300014581531286,
|
|
-0.0026200017891824245,
|
|
-0.004300014581531286,
|
|
-0.004140013363212347,
|
|
0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
0.004300014581531286,
|
|
-0.004020012449473143,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.0036600097082555294,
|
|
-0.004300014581531286,
|
|
0.0009800005936995149,
|
|
0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.0042600142769515514,
|
|
-0.004180013667792082,
|
|
-0.0036200094036757946,
|
|
0.004300014581531286,
|
|
0.004300014581531286,
|
|
0.0017799974884837866,
|
|
0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
0.0021799984388053417,
|
|
0.0031800060532987118,
|
|
0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
0.004300014581531286
|
|
],
|
|
"bias_update_speed": 1e-05,
|
|
"num_experts": 60,
|
|
"module_type": "AuxFreeQwen2MoeSparseMoeBlock",
|
|
"device": "cuda:0",
|
|
"dtype": "torch.float32"
|
|
},
|
|
"model.layers.8.mlp": {
|
|
"bias_values": [
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
0.004300014581531286,
|
|
0.004300014581531286,
|
|
-0.003300006967037916,
|
|
0.004300014581531286,
|
|
-0.004300014581531286,
|
|
0.003780010621994734,
|
|
-0.004300014581531286,
|
|
0.004300014581531286,
|
|
-0.004300014581531286,
|
|
0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.002900003921240568,
|
|
0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-5.9999980294378474e-05,
|
|
0.004300014581531286,
|
|
-0.004300014581531286,
|
|
0.0023799999617040157,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.0007400002796202898,
|
|
-0.002540001180022955,
|
|
0.0023799999617040157,
|
|
-0.004300014581531286,
|
|
0.004300014581531286,
|
|
-0.004220013972371817,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
0.0042600142769515514,
|
|
0.003940011840313673,
|
|
0.003300006967037916,
|
|
0.003380007576197386,
|
|
-0.004300014581531286,
|
|
-0.003140005748718977,
|
|
-0.004300014581531286,
|
|
-0.004140013363212347,
|
|
0.0020199972204864025,
|
|
0.004180013667792082,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.0042600142769515514,
|
|
-0.004300014581531286,
|
|
0.004300014581531286,
|
|
-0.004300014581531286,
|
|
0.0026600020937621593,
|
|
0.004300014581531286,
|
|
-0.004300014581531286,
|
|
0.0010200005490332842,
|
|
-0.004300014581531286
|
|
],
|
|
"bias_update_speed": 1e-05,
|
|
"num_experts": 60,
|
|
"module_type": "AuxFreeQwen2MoeSparseMoeBlock",
|
|
"device": "cuda:0",
|
|
"dtype": "torch.float32"
|
|
},
|
|
"model.layers.9.mlp": {
|
|
"bias_values": [
|
|
-0.004180013667792082,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.0027800030075013638,
|
|
-0.003140005748718977,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.00350000848993659,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.0034600081853568554,
|
|
-0.002740002702921629,
|
|
-0.004180013667792082,
|
|
-0.004300014581531286,
|
|
0.0032200063578784466,
|
|
0.004300014581531286,
|
|
-0.004300014581531286,
|
|
0.004300014581531286,
|
|
0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.003300006967037916,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
0.004300014581531286,
|
|
0.004300014581531286,
|
|
0.0038200109265744686,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
0.004300014581531286,
|
|
0.003980012144893408,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.003740010317414999,
|
|
-0.004300014581531286,
|
|
0.004300014581531286,
|
|
-0.000499999790918082,
|
|
0.004300014581531286,
|
|
-0.004300014581531286,
|
|
0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.0042600142769515514,
|
|
-0.004180013667792082,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
0.004300014581531286,
|
|
0.0009800005936995149,
|
|
0.0042600142769515514,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286
|
|
],
|
|
"bias_update_speed": 1e-05,
|
|
"num_experts": 60,
|
|
"module_type": "AuxFreeQwen2MoeSparseMoeBlock",
|
|
"device": "cuda:0",
|
|
"dtype": "torch.float32"
|
|
},
|
|
"model.layers.10.mlp": {
|
|
"bias_values": [
|
|
-0.004300014581531286,
|
|
0.0016999978106468916,
|
|
-0.004300014581531286,
|
|
-0.003300006967037916,
|
|
0.0016199980163946748,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.002139998134225607,
|
|
-0.004300014581531286,
|
|
-0.003980012144893408,
|
|
0.003740010317414999,
|
|
-0.004220013972371817,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
0.004300014581531286,
|
|
-0.004300014581531286,
|
|
0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004220013972371817,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
0.004300014581531286,
|
|
-0.004300014581531286,
|
|
0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
0.004300014581531286,
|
|
0.004100013058632612,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
0.004220013972371817,
|
|
-0.004300014581531286,
|
|
-0.004220013972371817,
|
|
0.0021799984388053417,
|
|
0.0014999986160546541,
|
|
-0.0012999993050470948,
|
|
-0.004300014581531286,
|
|
0.004300014581531286,
|
|
-0.0029800045304000378,
|
|
0.004300014581531286,
|
|
-0.0042600142769515514,
|
|
0.0038600112311542034,
|
|
-0.004300014581531286,
|
|
-0.0042600142769515514,
|
|
-0.004300014581531286,
|
|
0.004300014581531286,
|
|
0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
0.004300014581531286,
|
|
-0.004100013058632612,
|
|
0.002740002702921629,
|
|
0.004060012754052877,
|
|
0.0028600036166608334
|
|
],
|
|
"bias_update_speed": 1e-05,
|
|
"num_experts": 60,
|
|
"module_type": "AuxFreeQwen2MoeSparseMoeBlock",
|
|
"device": "cuda:0",
|
|
"dtype": "torch.float32"
|
|
},
|
|
"model.layers.11.mlp": {
|
|
"bias_values": [
|
|
-0.004300014581531286,
|
|
0.004300014581531286,
|
|
-0.004300014581531286,
|
|
0.004300014581531286,
|
|
-0.004220013972371817,
|
|
-0.004300014581531286,
|
|
-0.004060012754052877,
|
|
0.004100013058632612,
|
|
-0.004300014581531286,
|
|
0.0042600142769515514,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
0.004300014581531286,
|
|
-0.0042600142769515514,
|
|
0.0015399983385577798,
|
|
0.003940011840313673,
|
|
-0.002900003921240568,
|
|
-0.004300014581531286,
|
|
-0.004140013363212347,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004180013667792082,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.0036600097082555294,
|
|
-0.004300014581531286,
|
|
-0.0042600142769515514,
|
|
0.003140005748718977,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
0.004300014581531286,
|
|
0.0011000001104548573,
|
|
0.004300014581531286,
|
|
0.004300014581531286,
|
|
0.004300014581531286,
|
|
0.004300014581531286,
|
|
0.004300014581531286,
|
|
-0.004300014581531286,
|
|
0.004300014581531286,
|
|
-0.00029999998514540493,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.003980012144893408,
|
|
0.003780010621994734,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286
|
|
],
|
|
"bias_update_speed": 1e-05,
|
|
"num_experts": 60,
|
|
"module_type": "AuxFreeQwen2MoeSparseMoeBlock",
|
|
"device": "cuda:0",
|
|
"dtype": "torch.float32"
|
|
},
|
|
"model.layers.12.mlp": {
|
|
"bias_values": [
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.0042600142769515514,
|
|
-0.0011799997882917523,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
0.0005399998626671731,
|
|
-0.0036200094036757946,
|
|
-0.004300014581531286,
|
|
0.004300014581531286,
|
|
-0.0017799973720684648,
|
|
-0.002099997829645872,
|
|
0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
0.0010200004326179624,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.00013999994553159922,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
0.004300014581531286,
|
|
-0.002099997829645872,
|
|
-0.004300014581531286,
|
|
-0.004020012449473143,
|
|
0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
0.004300014581531286,
|
|
-0.002700002398341894,
|
|
0.004300014581531286,
|
|
-0.0004999999073334038,
|
|
0.004300014581531286,
|
|
-0.002900003921240568,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
0.004300014581531286,
|
|
-0.004300014581531286,
|
|
0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.003100005444139242,
|
|
-0.004300014581531286,
|
|
-0.0015399983385577798,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
0.004300014581531286
|
|
],
|
|
"bias_update_speed": 1e-05,
|
|
"num_experts": 60,
|
|
"module_type": "AuxFreeQwen2MoeSparseMoeBlock",
|
|
"device": "cuda:0",
|
|
"dtype": "torch.float32"
|
|
},
|
|
"model.layers.13.mlp": {
|
|
"bias_values": [
|
|
-0.004300014581531286,
|
|
0.001659997971728444,
|
|
-0.004300014581531286,
|
|
-0.0011399999493733048,
|
|
-0.0042600142769515514,
|
|
0.0027800030075013638,
|
|
-0.004300014581531286,
|
|
0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
0.004300014581531286,
|
|
-0.004300014581531286,
|
|
0.0013799989828839898,
|
|
0.004300014581531286,
|
|
-0.004300014581531286,
|
|
0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.00358000909909606,
|
|
0.002700002398341894,
|
|
-0.004300014581531286,
|
|
0.00037999998312443495,
|
|
0.0029800045304000378,
|
|
-0.004300014581531286,
|
|
0.004300014581531286,
|
|
-0.004300014581531286,
|
|
0.004300014581531286,
|
|
0.003940011840313673,
|
|
0.004300014581531286,
|
|
0.0042600142769515514,
|
|
0.004300014581531286,
|
|
0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.00033999988227151334,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
0.004220013972371817,
|
|
0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.0042600142769515514,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
0.004300014581531286,
|
|
-0.0042600142769515514,
|
|
-0.0042600142769515514,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.003740010317414999,
|
|
-0.004300014581531286
|
|
],
|
|
"bias_update_speed": 1e-05,
|
|
"num_experts": 60,
|
|
"module_type": "AuxFreeQwen2MoeSparseMoeBlock",
|
|
"device": "cuda:0",
|
|
"dtype": "torch.float32"
|
|
},
|
|
"model.layers.14.mlp": {
|
|
"bias_values": [
|
|
0.004300014581531286,
|
|
0.004300014581531286,
|
|
0.004300014581531286,
|
|
0.0018999968888238072,
|
|
-0.004300014581531286,
|
|
-0.004140013363212347,
|
|
0.00078000029316172,
|
|
0.0017799973720684648,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
0.004300014581531286,
|
|
0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.0042600142769515514,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.0042600142769515514,
|
|
0.0004999998491257429,
|
|
0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
0.004300014581531286,
|
|
0.004020012449473143,
|
|
-0.004300014581531286,
|
|
-0.0042600142769515514,
|
|
-0.004300014581531286,
|
|
-0.0038600112311542034,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
0.0036600097082555294,
|
|
-0.0042600142769515514,
|
|
-0.004300014581531286,
|
|
-0.004220013972371817,
|
|
0.004300014581531286,
|
|
-0.004300014581531286,
|
|
0.0042600142769515514,
|
|
0.004220013972371817,
|
|
-0.001739997649565339,
|
|
0.004220013972371817,
|
|
-0.0042600142769515514,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
0.004180013667792082,
|
|
-0.004300014581531286,
|
|
0.004300014581531286,
|
|
0.002940004225820303,
|
|
0.0038600112311542034,
|
|
-0.004300014581531286,
|
|
0.004300014581531286,
|
|
-0.0042600142769515514,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286
|
|
],
|
|
"bias_update_speed": 1e-05,
|
|
"num_experts": 60,
|
|
"module_type": "AuxFreeQwen2MoeSparseMoeBlock",
|
|
"device": "cuda:0",
|
|
"dtype": "torch.float32"
|
|
},
|
|
"model.layers.15.mlp": {
|
|
"bias_values": [
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
0.004180013667792082,
|
|
-0.004300014581531286,
|
|
-0.0031800060532987118,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
0.0042600142769515514,
|
|
-0.003780010621994734,
|
|
0.004300014581531286,
|
|
0.004300014581531286,
|
|
-0.004300014581531286,
|
|
0.004300014581531286,
|
|
-0.004100013058632612,
|
|
0.004300014581531286,
|
|
-0.004300014581531286,
|
|
0.0021799984388053417,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
0.004300014581531286,
|
|
-0.004300014581531286,
|
|
0.004300014581531286,
|
|
0.004300014581531286,
|
|
-0.004300014581531286,
|
|
0.004060012754052877,
|
|
-0.004300014581531286,
|
|
0.002540001180022955,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
0.004220013972371817,
|
|
0.004220013972371817,
|
|
0.004300014581531286,
|
|
0.004300014581531286,
|
|
0.0026600020937621593,
|
|
-0.003940011840313673,
|
|
-0.004180013667792082,
|
|
0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.0017399975331500173,
|
|
0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
0.0034200078807771206,
|
|
0.0003399999695830047,
|
|
0.004300014581531286,
|
|
-0.0032600066624581814,
|
|
0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
0.0042600142769515514,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
0.004300014581531286,
|
|
-0.0017399975331500173,
|
|
-0.004300014581531286,
|
|
-0.004220013972371817,
|
|
-0.0042600142769515514,
|
|
-0.0025800014846026897
|
|
],
|
|
"bias_update_speed": 1e-05,
|
|
"num_experts": 60,
|
|
"module_type": "AuxFreeQwen2MoeSparseMoeBlock",
|
|
"device": "cuda:0",
|
|
"dtype": "torch.float32"
|
|
},
|
|
"model.layers.16.mlp": {
|
|
"bias_values": [
|
|
-0.004300014581531286,
|
|
0.0029800045304000378,
|
|
0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
0.004300014581531286,
|
|
-0.0011399999493733048,
|
|
-0.004300014581531286,
|
|
0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.0016999976942315698,
|
|
0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
0.0023799999617040157,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
0.002540001180022955,
|
|
-0.004100013058632612,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
0.0042600142769515514,
|
|
0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.0032200063578784466,
|
|
0.004300014581531286,
|
|
0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004060012754052877,
|
|
-0.0042600142769515514,
|
|
0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
0.004300014581531286,
|
|
-0.004300014581531286,
|
|
0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.0005799999344162643,
|
|
-0.004100013058632612,
|
|
-0.004300014581531286,
|
|
0.004300014581531286,
|
|
0.002139998134225607,
|
|
0.004300014581531286,
|
|
-0.004300014581531286,
|
|
0.004180013667792082,
|
|
0.004300014581531286
|
|
],
|
|
"bias_update_speed": 1e-05,
|
|
"num_experts": 60,
|
|
"module_type": "AuxFreeQwen2MoeSparseMoeBlock",
|
|
"device": "cuda:0",
|
|
"dtype": "torch.float32"
|
|
},
|
|
"model.layers.17.mlp": {
|
|
"bias_values": [
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
0.0018599970499053597,
|
|
0.004300014581531286,
|
|
-0.0028200033120810986,
|
|
0.004300014581531286,
|
|
-0.002740002702921629,
|
|
0.004300014581531286,
|
|
-0.004300014581531286,
|
|
0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.0042600142769515514,
|
|
-0.0018199972109869123,
|
|
0.004300014581531286,
|
|
-0.004300014581531286,
|
|
0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
0.004300014581531286,
|
|
-0.002099997829645872,
|
|
-0.004300014581531286,
|
|
-0.004060012754052877,
|
|
-0.004300014581531286,
|
|
-0.00350000848993659,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
0.004300014581531286,
|
|
0.0027800030075013638,
|
|
0.0034600081853568554,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
0.0018999968888238072,
|
|
-0.004300014581531286,
|
|
-0.0010200004326179624,
|
|
-0.004300014581531286,
|
|
0.004300014581531286,
|
|
0.00029999998514540493,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.002740002702921629,
|
|
-0.004300014581531286,
|
|
0.004300014581531286,
|
|
-0.004300014581531286,
|
|
0.004220013972371817,
|
|
-0.004300014581531286,
|
|
0.0042600142769515514,
|
|
0.004300014581531286,
|
|
-0.004300014581531286,
|
|
0.0024200002662837505,
|
|
0.004300014581531286,
|
|
0.003780010621994734
|
|
],
|
|
"bias_update_speed": 1e-05,
|
|
"num_experts": 60,
|
|
"module_type": "AuxFreeQwen2MoeSparseMoeBlock",
|
|
"device": "cuda:0",
|
|
"dtype": "torch.float32"
|
|
},
|
|
"model.layers.18.mlp": {
|
|
"bias_values": [
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.0030200048349797726,
|
|
-0.004300014581531286,
|
|
-0.003140005748718977,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
0.004300014581531286,
|
|
-0.004300014581531286,
|
|
0.003380007576197386,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.002900003921240568,
|
|
-0.004300014581531286,
|
|
0.004180013667792082,
|
|
0.004300014581531286,
|
|
-0.004300014581531286,
|
|
0.004300014581531286,
|
|
0.002339999657124281,
|
|
0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.00350000848993659,
|
|
-0.004300014581531286,
|
|
-0.0028600036166608334,
|
|
0.004300014581531286,
|
|
-0.0030600051395595074,
|
|
0.004300014581531286,
|
|
0.004300014581531286,
|
|
0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
0.004300014581531286,
|
|
-0.0004199998511467129,
|
|
-0.0036200094036757946,
|
|
-0.004300014581531286,
|
|
0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.0016199980163946748,
|
|
-0.004300014581531286,
|
|
0.00358000909909606,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
0.004140013363212347,
|
|
0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.0011399999493733048,
|
|
0.004300014581531286,
|
|
-0.004180013667792082,
|
|
-0.004300014581531286,
|
|
-0.0039000115357339382,
|
|
-0.004300014581531286
|
|
],
|
|
"bias_update_speed": 1e-05,
|
|
"num_experts": 60,
|
|
"module_type": "AuxFreeQwen2MoeSparseMoeBlock",
|
|
"device": "cuda:0",
|
|
"dtype": "torch.float32"
|
|
},
|
|
"model.layers.19.mlp": {
|
|
"bias_values": [
|
|
0.004300014581531286,
|
|
-0.004300014581531286,
|
|
0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
0.00358000909909606,
|
|
-0.004300014581531286,
|
|
0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
0.004300014581531286,
|
|
0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.003980012144893408,
|
|
0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.0014599986607208848,
|
|
0.004100013058632612,
|
|
-0.004300014581531286,
|
|
0.004140013363212347,
|
|
0.0014599987771362066,
|
|
-0.004300014581531286,
|
|
0.0036200094036757946,
|
|
-0.004300014581531286,
|
|
0.0022599990479648113,
|
|
-0.004300014581531286,
|
|
0.004300014581531286,
|
|
0.004300014581531286,
|
|
-0.004300014581531286,
|
|
0.004300014581531286,
|
|
0.004300014581531286,
|
|
-0.0034200078807771206,
|
|
-0.004300014581531286,
|
|
0.004300014581531286,
|
|
0.003340007271617651,
|
|
-0.0042600142769515514,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
0.0042600142769515514,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
0.0009800007101148367,
|
|
-0.004300014581531286,
|
|
-0.004220013972371817,
|
|
0.0020199972204864025,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286
|
|
],
|
|
"bias_update_speed": 1e-05,
|
|
"num_experts": 60,
|
|
"module_type": "AuxFreeQwen2MoeSparseMoeBlock",
|
|
"device": "cuda:0",
|
|
"dtype": "torch.float32"
|
|
},
|
|
"model.layers.20.mlp": {
|
|
"bias_values": [
|
|
-0.004220013972371817,
|
|
0.0036600097082555294,
|
|
0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
0.004300014581531286,
|
|
0.00350000848993659,
|
|
-0.00358000909909606,
|
|
-0.004300014581531286,
|
|
0.003980012144893408,
|
|
0.00350000848993659,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
0.004300014581531286,
|
|
-0.0036200094036757946,
|
|
-0.0027800030075013638,
|
|
0.004300014581531286,
|
|
-0.0022599990479648113,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.0011400000657886267,
|
|
-0.004180013667792082,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
0.0036600097082555294,
|
|
-0.0042600142769515514,
|
|
0.002700002398341894,
|
|
-0.00037999992491677403,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
0.0042600142769515514,
|
|
-0.004300014581531286,
|
|
0.004300014581531286,
|
|
-0.0036600097082555294,
|
|
-0.004300014581531286,
|
|
0.004300014581531286,
|
|
0.004300014581531286,
|
|
0.004300014581531286,
|
|
0.0036200094036757946,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
0.004300014581531286,
|
|
0.004300014581531286,
|
|
-0.004300014581531286
|
|
],
|
|
"bias_update_speed": 1e-05,
|
|
"num_experts": 60,
|
|
"module_type": "AuxFreeQwen2MoeSparseMoeBlock",
|
|
"device": "cuda:0",
|
|
"dtype": "torch.float32"
|
|
},
|
|
"model.layers.21.mlp": {
|
|
"bias_values": [
|
|
-0.004300014581531286,
|
|
0.0038200109265744686,
|
|
-0.004300014581531286,
|
|
-0.0042600142769515514,
|
|
-0.004300014581531286,
|
|
0.004300014581531286,
|
|
0.0028200033120810986,
|
|
-0.004300014581531286,
|
|
-0.003100005444139242,
|
|
-0.004180013667792082,
|
|
-0.0042600142769515514,
|
|
-0.0034200078807771206,
|
|
-0.003980012144893408,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
0.004300014581531286,
|
|
0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.0014999986160546541,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.0032200063578784466,
|
|
0.004140013363212347,
|
|
-0.004060012754052877,
|
|
-0.004300014581531286,
|
|
0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004180013667792082,
|
|
-0.0013799989828839898,
|
|
-0.004300014581531286,
|
|
0.004300014581531286,
|
|
-0.004300014581531286,
|
|
0.0030200048349797726,
|
|
-0.004300014581531286,
|
|
0.004300014581531286,
|
|
0.004300014581531286,
|
|
-0.004300014581531286,
|
|
0.004300014581531286,
|
|
-0.00250000087544322,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
0.004300014581531286,
|
|
-0.0026200017891824245,
|
|
0.0021799984388053417,
|
|
-0.004300014581531286,
|
|
-0.004220013972371817,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004220013972371817,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
0.004300014581531286
|
|
],
|
|
"bias_update_speed": 1e-05,
|
|
"num_experts": 60,
|
|
"module_type": "AuxFreeQwen2MoeSparseMoeBlock",
|
|
"device": "cuda:0",
|
|
"dtype": "torch.float32"
|
|
},
|
|
"model.layers.22.mlp": {
|
|
"bias_values": [
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
0.004300014581531286,
|
|
0.002299999352544546,
|
|
0.0011000001104548573,
|
|
-0.004300014581531286,
|
|
-0.003740010317414999,
|
|
0.004300014581531286,
|
|
0.004300014581531286,
|
|
0.00010000001930166036,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
0.0009000005666166544,
|
|
-0.003980012144893408,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
0.004020012449473143,
|
|
-0.004300014581531286,
|
|
-0.002740002702921629,
|
|
0.000780000351369381,
|
|
0.003980012144893408,
|
|
-0.004300014581531286,
|
|
0.004300014581531286,
|
|
-0.004020012449473143,
|
|
-0.004300014581531286,
|
|
0.0042600142769515514,
|
|
-0.0039000115357339382,
|
|
0.004300014581531286,
|
|
-0.004300014581531286,
|
|
0.004300014581531286,
|
|
0.004300014581531286,
|
|
-0.004300014581531286,
|
|
0.004300014581531286,
|
|
-0.004300014581531286,
|
|
0.004300014581531286,
|
|
0.004300014581531286,
|
|
0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.00041999988025054336,
|
|
-0.004300014581531286,
|
|
0.004300014581531286,
|
|
0.004300014581531286,
|
|
-0.004300014581531286,
|
|
0.004300014581531286,
|
|
-0.004300014581531286,
|
|
0.004300014581531286,
|
|
0.004140013363212347,
|
|
-0.004220013972371817,
|
|
0.004300014581531286,
|
|
-0.004300014581531286
|
|
],
|
|
"bias_update_speed": 1e-05,
|
|
"num_experts": 60,
|
|
"module_type": "AuxFreeQwen2MoeSparseMoeBlock",
|
|
"device": "cuda:0",
|
|
"dtype": "torch.float32"
|
|
},
|
|
"model.layers.23.mlp": {
|
|
"bias_values": [
|
|
0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
0.004300014581531286,
|
|
-0.004300014581531286,
|
|
0.004300014581531286,
|
|
0.003140005748718977,
|
|
0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
0.003140005748718977,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
0.004140013363212347,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.0036200094036757946,
|
|
0.003700010012835264,
|
|
0.004100013058632612,
|
|
-0.004300014581531286,
|
|
0.003700010012835264,
|
|
-0.0038200109265744686,
|
|
-0.004300014581531286,
|
|
0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004180013667792082,
|
|
-0.0034200078807771206,
|
|
0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.0018599971663206816,
|
|
-0.004300014581531286,
|
|
0.004300014581531286,
|
|
0.004300014581531286,
|
|
-0.004300014581531286,
|
|
0.004300014581531286,
|
|
0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.0034600081853568554,
|
|
-0.004300014581531286,
|
|
0.004300014581531286,
|
|
-0.002099997829645872,
|
|
-0.0042600142769515514,
|
|
-0.004300014581531286,
|
|
0.004300014581531286,
|
|
-0.004300014581531286,
|
|
0.002540001180022955,
|
|
-0.004300014581531286,
|
|
0.0007000002660788596,
|
|
-0.004300014581531286,
|
|
0.004300014581531286,
|
|
-0.004300014581531286,
|
|
-0.004300014581531286
|
|
],
|
|
"bias_update_speed": 1e-05,
|
|
"num_experts": 60,
|
|
"module_type": "AuxFreeQwen2MoeSparseMoeBlock",
|
|
"device": "cuda:0",
|
|
"dtype": "torch.float32"
|
|
}
|
|
}
|
|
} |