{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 0.02127699185889099, "eval_steps": 0, "global_step": 12000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.7730826549075824e-06, "grad_norm": 25.625, "learning_rate": 0.0, "loss": 3.8026, "step": 1 }, { "epoch": 3.546165309815165e-06, "grad_norm": 26.5, "learning_rate": 2e-06, "loss": 4.5357, "step": 2 }, { "epoch": 7.09233061963033e-06, "grad_norm": 21.875, "learning_rate": 6e-06, "loss": 4.3725, "step": 4 }, { "epoch": 1.0638495929445495e-05, "grad_norm": 23.5, "learning_rate": 1e-05, "loss": 4.4881, "step": 6 }, { "epoch": 1.418466123926066e-05, "grad_norm": 18.875, "learning_rate": 1.4e-05, "loss": 4.6479, "step": 8 }, { "epoch": 1.7730826549075824e-05, "grad_norm": 18.375, "learning_rate": 1.8e-05, "loss": 4.6278, "step": 10 }, { "epoch": 2.127699185889099e-05, "grad_norm": 17.75, "learning_rate": 2.2e-05, "loss": 4.3834, "step": 12 }, { "epoch": 2.4823157168706156e-05, "grad_norm": 26.75, "learning_rate": 2.6e-05, "loss": 4.3062, "step": 14 }, { "epoch": 2.836932247852132e-05, "grad_norm": 19.625, "learning_rate": 3e-05, "loss": 4.1917, "step": 16 }, { "epoch": 3.1915487788336485e-05, "grad_norm": 20.625, "learning_rate": 3.4000000000000007e-05, "loss": 3.8393, "step": 18 }, { "epoch": 3.546165309815165e-05, "grad_norm": 16.75, "learning_rate": 3.8e-05, "loss": 4.0825, "step": 20 }, { "epoch": 3.900781840796682e-05, "grad_norm": 16.375, "learning_rate": 4.2000000000000004e-05, "loss": 3.4053, "step": 22 }, { "epoch": 4.255398371778198e-05, "grad_norm": 22.75, "learning_rate": 4.6e-05, "loss": 4.1534, "step": 24 }, { "epoch": 4.610014902759714e-05, "grad_norm": 11.5, "learning_rate": 5e-05, "loss": 3.4799, "step": 26 }, { "epoch": 4.964631433741231e-05, "grad_norm": 10.75, "learning_rate": 5.4e-05, "loss": 2.9566, "step": 28 }, { "epoch": 5.3192479647227476e-05, "grad_norm": 9.6875, "learning_rate": 5.800000000000001e-05, "loss": 3.3893, "step": 30 }, { "epoch": 5.673864495704264e-05, "grad_norm": 14.0, "learning_rate": 6.2e-05, "loss": 3.7127, "step": 32 }, { "epoch": 6.028481026685781e-05, "grad_norm": 7.21875, "learning_rate": 6.6e-05, "loss": 2.708, "step": 34 }, { "epoch": 6.383097557667297e-05, "grad_norm": 7.75, "learning_rate": 7.000000000000001e-05, "loss": 3.3579, "step": 36 }, { "epoch": 6.737714088648813e-05, "grad_norm": 5.8125, "learning_rate": 7.4e-05, "loss": 2.5665, "step": 38 }, { "epoch": 7.09233061963033e-05, "grad_norm": 7.59375, "learning_rate": 7.8e-05, "loss": 2.7873, "step": 40 }, { "epoch": 7.446947150611846e-05, "grad_norm": 6.78125, "learning_rate": 8.2e-05, "loss": 2.4843, "step": 42 }, { "epoch": 7.801563681593364e-05, "grad_norm": 6.28125, "learning_rate": 8.599999999999999e-05, "loss": 3.0413, "step": 44 }, { "epoch": 8.15618021257488e-05, "grad_norm": 4.46875, "learning_rate": 8.999999999999999e-05, "loss": 2.2008, "step": 46 }, { "epoch": 8.510796743556396e-05, "grad_norm": 4.5, "learning_rate": 9.400000000000001e-05, "loss": 2.182, "step": 48 }, { "epoch": 8.865413274537912e-05, "grad_norm": 3.421875, "learning_rate": 9.800000000000001e-05, "loss": 2.0904, "step": 50 }, { "epoch": 9.220029805519429e-05, "grad_norm": 4.84375, "learning_rate": 0.000102, "loss": 2.1894, "step": 52 }, { "epoch": 9.574646336500945e-05, "grad_norm": 4.8125, "learning_rate": 0.000106, "loss": 2.4153, "step": 54 }, { "epoch": 9.929262867482463e-05, "grad_norm": 3.75, "learning_rate": 0.00011, "loss": 2.2583, "step": 56 }, { "epoch": 0.00010283879398463979, "grad_norm": 3.0625, "learning_rate": 0.000114, "loss": 2.08, "step": 58 }, { "epoch": 0.00010638495929445495, "grad_norm": 3.125, "learning_rate": 0.000118, "loss": 2.6455, "step": 60 }, { "epoch": 0.00010993112460427011, "grad_norm": 5.59375, "learning_rate": 0.000122, "loss": 2.1495, "step": 62 }, { "epoch": 0.00011347728991408528, "grad_norm": 4.46875, "learning_rate": 0.000126, "loss": 1.957, "step": 64 }, { "epoch": 0.00011702345522390044, "grad_norm": 4.53125, "learning_rate": 0.00013000000000000002, "loss": 2.142, "step": 66 }, { "epoch": 0.00012056962053371562, "grad_norm": 4.40625, "learning_rate": 0.000134, "loss": 2.088, "step": 68 }, { "epoch": 0.00012411578584353076, "grad_norm": 5.34375, "learning_rate": 0.00013800000000000002, "loss": 2.3496, "step": 70 }, { "epoch": 0.00012766195115334594, "grad_norm": 5.46875, "learning_rate": 0.00014199999999999998, "loss": 2.3067, "step": 72 }, { "epoch": 0.00013120811646316112, "grad_norm": 3.265625, "learning_rate": 0.000146, "loss": 1.7205, "step": 74 }, { "epoch": 0.00013475428177297627, "grad_norm": 3.53125, "learning_rate": 0.00015, "loss": 1.9972, "step": 76 }, { "epoch": 0.00013830044708279144, "grad_norm": 3.53125, "learning_rate": 0.000154, "loss": 1.8041, "step": 78 }, { "epoch": 0.0001418466123926066, "grad_norm": 2.65625, "learning_rate": 0.000158, "loss": 2.2117, "step": 80 }, { "epoch": 0.00014539277770242177, "grad_norm": 4.4375, "learning_rate": 0.000162, "loss": 2.1783, "step": 82 }, { "epoch": 0.00014893894301223692, "grad_norm": 2.140625, "learning_rate": 0.00016600000000000002, "loss": 1.9516, "step": 84 }, { "epoch": 0.0001524851083220521, "grad_norm": 4.3125, "learning_rate": 0.00017, "loss": 2.0596, "step": 86 }, { "epoch": 0.00015603127363186727, "grad_norm": 2.484375, "learning_rate": 0.000174, "loss": 1.9511, "step": 88 }, { "epoch": 0.00015957743894168242, "grad_norm": 2.171875, "learning_rate": 0.000178, "loss": 1.9226, "step": 90 }, { "epoch": 0.0001631236042514976, "grad_norm": 6.15625, "learning_rate": 0.000182, "loss": 2.1279, "step": 92 }, { "epoch": 0.00016666976956131275, "grad_norm": 2.109375, "learning_rate": 0.000186, "loss": 1.6153, "step": 94 }, { "epoch": 0.00017021593487112792, "grad_norm": 3.265625, "learning_rate": 0.00019, "loss": 2.2568, "step": 96 }, { "epoch": 0.0001737621001809431, "grad_norm": 3.0625, "learning_rate": 0.000194, "loss": 1.8394, "step": 98 }, { "epoch": 0.00017730826549075825, "grad_norm": 4.0, "learning_rate": 0.00019800000000000002, "loss": 1.9269, "step": 100 }, { "epoch": 0.00018085443080057342, "grad_norm": 2.328125, "learning_rate": 0.000202, "loss": 1.8469, "step": 102 }, { "epoch": 0.00018440059611038857, "grad_norm": 3.6875, "learning_rate": 0.000206, "loss": 2.1011, "step": 104 }, { "epoch": 0.00018794676142020375, "grad_norm": 8.375, "learning_rate": 0.00021, "loss": 1.9144, "step": 106 }, { "epoch": 0.0001914929267300189, "grad_norm": 2.734375, "learning_rate": 0.000214, "loss": 1.855, "step": 108 }, { "epoch": 0.00019503909203983407, "grad_norm": 1.453125, "learning_rate": 0.000218, "loss": 1.5246, "step": 110 }, { "epoch": 0.00019858525734964925, "grad_norm": 2.71875, "learning_rate": 0.000222, "loss": 1.6882, "step": 112 }, { "epoch": 0.0002021314226594644, "grad_norm": 2.125, "learning_rate": 0.00022600000000000002, "loss": 1.9064, "step": 114 }, { "epoch": 0.00020567758796927958, "grad_norm": 3.53125, "learning_rate": 0.00023, "loss": 1.932, "step": 116 }, { "epoch": 0.00020922375327909473, "grad_norm": 5.28125, "learning_rate": 0.00023400000000000002, "loss": 1.8807, "step": 118 }, { "epoch": 0.0002127699185889099, "grad_norm": 3.015625, "learning_rate": 0.00023799999999999998, "loss": 1.8285, "step": 120 }, { "epoch": 0.00021631608389872508, "grad_norm": 3.0, "learning_rate": 0.000242, "loss": 1.7112, "step": 122 }, { "epoch": 0.00021986224920854023, "grad_norm": 3.390625, "learning_rate": 0.000246, "loss": 1.8058, "step": 124 }, { "epoch": 0.0002234084145183554, "grad_norm": 1.875, "learning_rate": 0.00025, "loss": 1.7402, "step": 126 }, { "epoch": 0.00022695457982817055, "grad_norm": 1.953125, "learning_rate": 0.000254, "loss": 1.7423, "step": 128 }, { "epoch": 0.00023050074513798573, "grad_norm": 1.671875, "learning_rate": 0.00025800000000000004, "loss": 1.9259, "step": 130 }, { "epoch": 0.00023404691044780088, "grad_norm": 1.6796875, "learning_rate": 0.000262, "loss": 1.9277, "step": 132 }, { "epoch": 0.00023759307575761606, "grad_norm": 2.125, "learning_rate": 0.000266, "loss": 2.1548, "step": 134 }, { "epoch": 0.00024113924106743123, "grad_norm": 3.796875, "learning_rate": 0.00027, "loss": 2.0847, "step": 136 }, { "epoch": 0.0002446854063772464, "grad_norm": 3.546875, "learning_rate": 0.00027400000000000005, "loss": 1.9213, "step": 138 }, { "epoch": 0.00024823157168706153, "grad_norm": 1.6796875, "learning_rate": 0.00027800000000000004, "loss": 1.9205, "step": 140 }, { "epoch": 0.00025177773699687673, "grad_norm": 1.765625, "learning_rate": 0.00028199999999999997, "loss": 1.9867, "step": 142 }, { "epoch": 0.0002553239023066919, "grad_norm": 0.8203125, "learning_rate": 0.00028599999999999996, "loss": 1.5982, "step": 144 }, { "epoch": 0.00025887006761650703, "grad_norm": 2.5625, "learning_rate": 0.00029, "loss": 1.7819, "step": 146 }, { "epoch": 0.00026241623292632224, "grad_norm": 1.5234375, "learning_rate": 0.000294, "loss": 1.8248, "step": 148 }, { "epoch": 0.0002659623982361374, "grad_norm": 2.671875, "learning_rate": 0.000298, "loss": 1.8077, "step": 150 }, { "epoch": 0.00026950856354595253, "grad_norm": 1.140625, "learning_rate": 0.000302, "loss": 1.6061, "step": 152 }, { "epoch": 0.0002730547288557677, "grad_norm": 1.5078125, "learning_rate": 0.000306, "loss": 1.865, "step": 154 }, { "epoch": 0.0002766008941655829, "grad_norm": 1.4765625, "learning_rate": 0.00031, "loss": 1.7381, "step": 156 }, { "epoch": 0.00028014705947539804, "grad_norm": 1.109375, "learning_rate": 0.000314, "loss": 1.5453, "step": 158 }, { "epoch": 0.0002836932247852132, "grad_norm": 2.171875, "learning_rate": 0.00031800000000000003, "loss": 1.7776, "step": 160 }, { "epoch": 0.0002872393900950284, "grad_norm": 1.7109375, "learning_rate": 0.000322, "loss": 1.5954, "step": 162 }, { "epoch": 0.00029078555540484354, "grad_norm": 3.59375, "learning_rate": 0.000326, "loss": 1.7511, "step": 164 }, { "epoch": 0.0002943317207146587, "grad_norm": 1.4453125, "learning_rate": 0.00033, "loss": 1.5288, "step": 166 }, { "epoch": 0.00029787788602447384, "grad_norm": 1.40625, "learning_rate": 0.00033400000000000004, "loss": 1.5858, "step": 168 }, { "epoch": 0.00030142405133428904, "grad_norm": 2.015625, "learning_rate": 0.00033800000000000003, "loss": 1.4616, "step": 170 }, { "epoch": 0.0003049702166441042, "grad_norm": 3.53125, "learning_rate": 0.000342, "loss": 1.8694, "step": 172 }, { "epoch": 0.00030851638195391934, "grad_norm": 2.75, "learning_rate": 0.000346, "loss": 1.6349, "step": 174 }, { "epoch": 0.00031206254726373454, "grad_norm": 1.328125, "learning_rate": 0.00035, "loss": 1.5103, "step": 176 }, { "epoch": 0.0003156087125735497, "grad_norm": 1.78125, "learning_rate": 0.000354, "loss": 1.5582, "step": 178 }, { "epoch": 0.00031915487788336484, "grad_norm": 1.015625, "learning_rate": 0.000358, "loss": 1.7065, "step": 180 }, { "epoch": 0.00032270104319318, "grad_norm": 0.890625, "learning_rate": 0.000362, "loss": 1.5301, "step": 182 }, { "epoch": 0.0003262472085029952, "grad_norm": 1.828125, "learning_rate": 0.000366, "loss": 1.5613, "step": 184 }, { "epoch": 0.00032979337381281034, "grad_norm": 3.828125, "learning_rate": 0.00037, "loss": 1.9838, "step": 186 }, { "epoch": 0.0003333395391226255, "grad_norm": 4.875, "learning_rate": 0.000374, "loss": 2.3287, "step": 188 }, { "epoch": 0.0003368857044324407, "grad_norm": 0.9375, "learning_rate": 0.000378, "loss": 1.7776, "step": 190 }, { "epoch": 0.00034043186974225584, "grad_norm": 1.6484375, "learning_rate": 0.000382, "loss": 1.6625, "step": 192 }, { "epoch": 0.000343978035052071, "grad_norm": 0.64453125, "learning_rate": 0.000386, "loss": 1.398, "step": 194 }, { "epoch": 0.0003475242003618862, "grad_norm": 1.8125, "learning_rate": 0.00039000000000000005, "loss": 1.7848, "step": 196 }, { "epoch": 0.00035107036567170135, "grad_norm": 1.390625, "learning_rate": 0.00039400000000000004, "loss": 1.8591, "step": 198 }, { "epoch": 0.0003546165309815165, "grad_norm": 0.8203125, "learning_rate": 0.000398, "loss": 1.3619, "step": 200 }, { "epoch": 0.00035816269629133164, "grad_norm": 1.3203125, "learning_rate": 0.000402, "loss": 1.9058, "step": 202 }, { "epoch": 0.00036170886160114685, "grad_norm": 0.74609375, "learning_rate": 0.00040600000000000006, "loss": 1.4741, "step": 204 }, { "epoch": 0.000365255026910962, "grad_norm": 0.71484375, "learning_rate": 0.00041, "loss": 1.5142, "step": 206 }, { "epoch": 0.00036880119222077715, "grad_norm": 2.171875, "learning_rate": 0.000414, "loss": 1.861, "step": 208 }, { "epoch": 0.00037234735753059235, "grad_norm": 0.82421875, "learning_rate": 0.00041799999999999997, "loss": 1.3935, "step": 210 }, { "epoch": 0.0003758935228404075, "grad_norm": 0.94140625, "learning_rate": 0.000422, "loss": 1.3792, "step": 212 }, { "epoch": 0.00037943968815022265, "grad_norm": 1.5390625, "learning_rate": 0.000426, "loss": 1.7609, "step": 214 }, { "epoch": 0.0003829858534600378, "grad_norm": 0.83203125, "learning_rate": 0.00043, "loss": 1.3885, "step": 216 }, { "epoch": 0.000386532018769853, "grad_norm": 1.5, "learning_rate": 0.00043400000000000003, "loss": 1.4444, "step": 218 }, { "epoch": 0.00039007818407966815, "grad_norm": 1.59375, "learning_rate": 0.000438, "loss": 1.3774, "step": 220 }, { "epoch": 0.0003936243493894833, "grad_norm": 0.85546875, "learning_rate": 0.000442, "loss": 1.698, "step": 222 }, { "epoch": 0.0003971705146992985, "grad_norm": 0.96875, "learning_rate": 0.000446, "loss": 1.4195, "step": 224 }, { "epoch": 0.00040071668000911365, "grad_norm": 0.765625, "learning_rate": 0.00045000000000000004, "loss": 1.6253, "step": 226 }, { "epoch": 0.0004042628453189288, "grad_norm": 1.40625, "learning_rate": 0.00045400000000000003, "loss": 1.5447, "step": 228 }, { "epoch": 0.00040780901062874395, "grad_norm": 0.78125, "learning_rate": 0.000458, "loss": 1.5232, "step": 230 }, { "epoch": 0.00041135517593855915, "grad_norm": 2.140625, "learning_rate": 0.000462, "loss": 1.9986, "step": 232 }, { "epoch": 0.0004149013412483743, "grad_norm": 1.5546875, "learning_rate": 0.00046600000000000005, "loss": 1.3245, "step": 234 }, { "epoch": 0.00041844750655818945, "grad_norm": 0.8671875, "learning_rate": 0.00047, "loss": 2.1678, "step": 236 }, { "epoch": 0.00042199367186800466, "grad_norm": 1.734375, "learning_rate": 0.000474, "loss": 1.6768, "step": 238 }, { "epoch": 0.0004255398371778198, "grad_norm": 0.62890625, "learning_rate": 0.00047799999999999996, "loss": 1.5694, "step": 240 }, { "epoch": 0.00042908600248763495, "grad_norm": 3.15625, "learning_rate": 0.000482, "loss": 1.3832, "step": 242 }, { "epoch": 0.00043263216779745016, "grad_norm": 2.390625, "learning_rate": 0.000486, "loss": 1.4785, "step": 244 }, { "epoch": 0.0004361783331072653, "grad_norm": 1.8671875, "learning_rate": 0.00049, "loss": 1.7814, "step": 246 }, { "epoch": 0.00043972449841708046, "grad_norm": 1.3828125, "learning_rate": 0.000494, "loss": 1.3818, "step": 248 }, { "epoch": 0.0004432706637268956, "grad_norm": 0.890625, "learning_rate": 0.000498, "loss": 1.4585, "step": 250 }, { "epoch": 0.0004468168290367108, "grad_norm": 0.62890625, "learning_rate": 0.0005020000000000001, "loss": 1.4761, "step": 252 }, { "epoch": 0.00045036299434652596, "grad_norm": 1.8203125, "learning_rate": 0.000506, "loss": 2.0438, "step": 254 }, { "epoch": 0.0004539091596563411, "grad_norm": 0.671875, "learning_rate": 0.00051, "loss": 1.7384, "step": 256 }, { "epoch": 0.0004574553249661563, "grad_norm": 2.5625, "learning_rate": 0.000514, "loss": 1.5145, "step": 258 }, { "epoch": 0.00046100149027597146, "grad_norm": 1.5703125, "learning_rate": 0.000518, "loss": 1.5793, "step": 260 }, { "epoch": 0.0004645476555857866, "grad_norm": 0.71484375, "learning_rate": 0.000522, "loss": 1.5587, "step": 262 }, { "epoch": 0.00046809382089560176, "grad_norm": 0.62109375, "learning_rate": 0.000526, "loss": 1.5064, "step": 264 }, { "epoch": 0.00047163998620541696, "grad_norm": 0.6171875, "learning_rate": 0.0005300000000000001, "loss": 1.571, "step": 266 }, { "epoch": 0.0004751861515152321, "grad_norm": 1.859375, "learning_rate": 0.0005340000000000001, "loss": 1.5269, "step": 268 }, { "epoch": 0.00047873231682504726, "grad_norm": 0.89453125, "learning_rate": 0.0005380000000000001, "loss": 1.7835, "step": 270 }, { "epoch": 0.00048227848213486246, "grad_norm": 1.0, "learning_rate": 0.0005420000000000001, "loss": 1.8869, "step": 272 }, { "epoch": 0.0004858246474446776, "grad_norm": 0.55078125, "learning_rate": 0.000546, "loss": 1.5411, "step": 274 }, { "epoch": 0.0004893708127544928, "grad_norm": 0.8203125, "learning_rate": 0.00055, "loss": 1.7809, "step": 276 }, { "epoch": 0.000492916978064308, "grad_norm": 1.3671875, "learning_rate": 0.000554, "loss": 1.6076, "step": 278 }, { "epoch": 0.0004964631433741231, "grad_norm": 0.64453125, "learning_rate": 0.000558, "loss": 1.5887, "step": 280 }, { "epoch": 0.0005000093086839383, "grad_norm": 6.28125, "learning_rate": 0.0005620000000000001, "loss": 1.6691, "step": 282 }, { "epoch": 0.0005035554739937535, "grad_norm": 3.890625, "learning_rate": 0.000566, "loss": 1.4103, "step": 284 }, { "epoch": 0.0005071016393035686, "grad_norm": 0.83203125, "learning_rate": 0.00057, "loss": 1.4613, "step": 286 }, { "epoch": 0.0005106478046133838, "grad_norm": 1.7265625, "learning_rate": 0.000574, "loss": 1.5102, "step": 288 }, { "epoch": 0.000514193969923199, "grad_norm": 1.3203125, "learning_rate": 0.000578, "loss": 1.9516, "step": 290 }, { "epoch": 0.0005177401352330141, "grad_norm": 1.7734375, "learning_rate": 0.0005819999999999999, "loss": 2.0735, "step": 292 }, { "epoch": 0.0005212863005428293, "grad_norm": 0.396484375, "learning_rate": 0.0005859999999999999, "loss": 1.4782, "step": 294 }, { "epoch": 0.0005248324658526445, "grad_norm": 0.72265625, "learning_rate": 0.00059, "loss": 1.5791, "step": 296 }, { "epoch": 0.0005283786311624596, "grad_norm": 1.4140625, "learning_rate": 0.000594, "loss": 1.662, "step": 298 }, { "epoch": 0.0005319247964722748, "grad_norm": 1.15625, "learning_rate": 0.000598, "loss": 1.7771, "step": 300 }, { "epoch": 0.0005354709617820899, "grad_norm": 1.296875, "learning_rate": 0.000602, "loss": 2.0136, "step": 302 }, { "epoch": 0.0005390171270919051, "grad_norm": 1.984375, "learning_rate": 0.000606, "loss": 1.6989, "step": 304 }, { "epoch": 0.0005425632924017203, "grad_norm": 0.84375, "learning_rate": 0.00061, "loss": 1.3634, "step": 306 }, { "epoch": 0.0005461094577115354, "grad_norm": 0.99609375, "learning_rate": 0.000614, "loss": 2.0646, "step": 308 }, { "epoch": 0.0005496556230213506, "grad_norm": 0.56640625, "learning_rate": 0.0006180000000000001, "loss": 1.6761, "step": 310 }, { "epoch": 0.0005532017883311658, "grad_norm": 1.234375, "learning_rate": 0.000622, "loss": 1.5955, "step": 312 }, { "epoch": 0.0005567479536409809, "grad_norm": 1.8203125, "learning_rate": 0.000626, "loss": 1.7073, "step": 314 }, { "epoch": 0.0005602941189507961, "grad_norm": 0.88671875, "learning_rate": 0.00063, "loss": 1.517, "step": 316 }, { "epoch": 0.0005638402842606113, "grad_norm": 0.439453125, "learning_rate": 0.000634, "loss": 1.2879, "step": 318 }, { "epoch": 0.0005673864495704264, "grad_norm": 1.7109375, "learning_rate": 0.000638, "loss": 1.5404, "step": 320 }, { "epoch": 0.0005709326148802416, "grad_norm": 0.79296875, "learning_rate": 0.000642, "loss": 1.289, "step": 322 }, { "epoch": 0.0005744787801900568, "grad_norm": 0.7734375, "learning_rate": 0.000646, "loss": 1.538, "step": 324 }, { "epoch": 0.0005780249454998719, "grad_norm": 0.66796875, "learning_rate": 0.0006500000000000001, "loss": 1.5195, "step": 326 }, { "epoch": 0.0005815711108096871, "grad_norm": 1.421875, "learning_rate": 0.0006540000000000001, "loss": 1.5568, "step": 328 }, { "epoch": 0.0005851172761195023, "grad_norm": 3.703125, "learning_rate": 0.0006580000000000001, "loss": 1.5764, "step": 330 }, { "epoch": 0.0005886634414293174, "grad_norm": 1.109375, "learning_rate": 0.000662, "loss": 1.6677, "step": 332 }, { "epoch": 0.0005922096067391326, "grad_norm": 0.67578125, "learning_rate": 0.000666, "loss": 1.7501, "step": 334 }, { "epoch": 0.0005957557720489477, "grad_norm": 0.90234375, "learning_rate": 0.00067, "loss": 1.2844, "step": 336 }, { "epoch": 0.0005993019373587629, "grad_norm": 3.078125, "learning_rate": 0.000674, "loss": 1.7816, "step": 338 }, { "epoch": 0.0006028481026685781, "grad_norm": 1.1328125, "learning_rate": 0.0006780000000000001, "loss": 1.436, "step": 340 }, { "epoch": 0.0006063942679783932, "grad_norm": 0.73046875, "learning_rate": 0.0006820000000000001, "loss": 1.4653, "step": 342 }, { "epoch": 0.0006099404332882084, "grad_norm": 1.1328125, "learning_rate": 0.0006860000000000001, "loss": 1.8547, "step": 344 }, { "epoch": 0.0006134865985980236, "grad_norm": 0.7265625, "learning_rate": 0.00069, "loss": 1.4515, "step": 346 }, { "epoch": 0.0006170327639078387, "grad_norm": 0.9921875, "learning_rate": 0.000694, "loss": 1.4289, "step": 348 }, { "epoch": 0.0006205789292176539, "grad_norm": 1.4296875, "learning_rate": 0.0006979999999999999, "loss": 1.6906, "step": 350 }, { "epoch": 0.0006241250945274691, "grad_norm": 2.40625, "learning_rate": 0.0007019999999999999, "loss": 1.4184, "step": 352 }, { "epoch": 0.0006276712598372842, "grad_norm": 0.94140625, "learning_rate": 0.0007059999999999999, "loss": 1.2701, "step": 354 }, { "epoch": 0.0006312174251470994, "grad_norm": 4.46875, "learning_rate": 0.00071, "loss": 1.3772, "step": 356 }, { "epoch": 0.0006347635904569146, "grad_norm": 3.5625, "learning_rate": 0.000714, "loss": 1.3526, "step": 358 }, { "epoch": 0.0006383097557667297, "grad_norm": 1.0234375, "learning_rate": 0.000718, "loss": 1.3579, "step": 360 }, { "epoch": 0.0006418559210765449, "grad_norm": 1.7265625, "learning_rate": 0.000722, "loss": 1.4318, "step": 362 }, { "epoch": 0.00064540208638636, "grad_norm": 2.015625, "learning_rate": 0.000726, "loss": 1.355, "step": 364 }, { "epoch": 0.0006489482516961752, "grad_norm": 1.3359375, "learning_rate": 0.00073, "loss": 1.3997, "step": 366 }, { "epoch": 0.0006524944170059904, "grad_norm": 1.4140625, "learning_rate": 0.000734, "loss": 1.0671, "step": 368 }, { "epoch": 0.0006560405823158055, "grad_norm": 1.484375, "learning_rate": 0.000738, "loss": 1.1156, "step": 370 }, { "epoch": 0.0006595867476256207, "grad_norm": 2.421875, "learning_rate": 0.000742, "loss": 1.2581, "step": 372 }, { "epoch": 0.0006631329129354359, "grad_norm": 1.0625, "learning_rate": 0.000746, "loss": 0.8968, "step": 374 }, { "epoch": 0.000666679078245251, "grad_norm": 3.765625, "learning_rate": 0.00075, "loss": 1.1869, "step": 376 }, { "epoch": 0.0006702252435550662, "grad_norm": 1.7734375, "learning_rate": 0.000754, "loss": 0.9751, "step": 378 }, { "epoch": 0.0006737714088648814, "grad_norm": 2.890625, "learning_rate": 0.000758, "loss": 1.2709, "step": 380 }, { "epoch": 0.0006773175741746965, "grad_norm": 1.6171875, "learning_rate": 0.000762, "loss": 1.1142, "step": 382 }, { "epoch": 0.0006808637394845117, "grad_norm": 3.796875, "learning_rate": 0.0007660000000000001, "loss": 0.9331, "step": 384 }, { "epoch": 0.0006844099047943269, "grad_norm": 2.859375, "learning_rate": 0.0007700000000000001, "loss": 0.9445, "step": 386 }, { "epoch": 0.000687956070104142, "grad_norm": 3.40625, "learning_rate": 0.0007740000000000001, "loss": 1.1067, "step": 388 }, { "epoch": 0.0006915022354139572, "grad_norm": 1.234375, "learning_rate": 0.000778, "loss": 0.8197, "step": 390 }, { "epoch": 0.0006950484007237724, "grad_norm": 2.671875, "learning_rate": 0.000782, "loss": 0.9289, "step": 392 }, { "epoch": 0.0006985945660335875, "grad_norm": 5.53125, "learning_rate": 0.000786, "loss": 1.2645, "step": 394 }, { "epoch": 0.0007021407313434027, "grad_norm": 2.96875, "learning_rate": 0.00079, "loss": 0.7802, "step": 396 }, { "epoch": 0.0007056868966532178, "grad_norm": 4.96875, "learning_rate": 0.0007940000000000001, "loss": 0.8851, "step": 398 }, { "epoch": 0.000709233061963033, "grad_norm": 1.5234375, "learning_rate": 0.0007980000000000001, "loss": 0.8057, "step": 400 }, { "epoch": 0.0007127792272728482, "grad_norm": 0.91796875, "learning_rate": 0.0008020000000000001, "loss": 0.8375, "step": 402 }, { "epoch": 0.0007163253925826633, "grad_norm": 0.98828125, "learning_rate": 0.0008060000000000001, "loss": 0.7738, "step": 404 }, { "epoch": 0.0007198715578924785, "grad_norm": 1.921875, "learning_rate": 0.0008100000000000001, "loss": 0.9653, "step": 406 }, { "epoch": 0.0007234177232022937, "grad_norm": 2.0625, "learning_rate": 0.0008139999999999999, "loss": 0.8432, "step": 408 }, { "epoch": 0.0007269638885121088, "grad_norm": 0.8359375, "learning_rate": 0.0008179999999999999, "loss": 0.8312, "step": 410 }, { "epoch": 0.000730510053821924, "grad_norm": 5.0625, "learning_rate": 0.0008219999999999999, "loss": 0.8912, "step": 412 }, { "epoch": 0.0007340562191317392, "grad_norm": 4.0625, "learning_rate": 0.000826, "loss": 0.8716, "step": 414 }, { "epoch": 0.0007376023844415543, "grad_norm": 6.6875, "learning_rate": 0.00083, "loss": 0.7662, "step": 416 }, { "epoch": 0.0007411485497513695, "grad_norm": 0.95703125, "learning_rate": 0.000834, "loss": 0.9265, "step": 418 }, { "epoch": 0.0007446947150611847, "grad_norm": 1.0078125, "learning_rate": 0.000838, "loss": 0.9106, "step": 420 }, { "epoch": 0.0007482408803709998, "grad_norm": 3.0625, "learning_rate": 0.000842, "loss": 0.8929, "step": 422 }, { "epoch": 0.000751787045680815, "grad_norm": 2.34375, "learning_rate": 0.000846, "loss": 0.8809, "step": 424 }, { "epoch": 0.0007553332109906301, "grad_norm": 0.76953125, "learning_rate": 0.00085, "loss": 0.6671, "step": 426 }, { "epoch": 0.0007588793763004453, "grad_norm": 0.7734375, "learning_rate": 0.000854, "loss": 0.7229, "step": 428 }, { "epoch": 0.0007624255416102605, "grad_norm": 2.484375, "learning_rate": 0.000858, "loss": 0.9, "step": 430 }, { "epoch": 0.0007659717069200756, "grad_norm": 2.203125, "learning_rate": 0.000862, "loss": 0.8212, "step": 432 }, { "epoch": 0.0007695178722298908, "grad_norm": 0.66015625, "learning_rate": 0.000866, "loss": 0.5973, "step": 434 }, { "epoch": 0.000773064037539706, "grad_norm": 2.328125, "learning_rate": 0.00087, "loss": 0.5344, "step": 436 }, { "epoch": 0.0007766102028495211, "grad_norm": 1.0, "learning_rate": 0.000874, "loss": 0.5902, "step": 438 }, { "epoch": 0.0007801563681593363, "grad_norm": 0.80078125, "learning_rate": 0.000878, "loss": 0.6295, "step": 440 }, { "epoch": 0.0007837025334691515, "grad_norm": 0.75, "learning_rate": 0.000882, "loss": 0.6326, "step": 442 }, { "epoch": 0.0007872486987789666, "grad_norm": 0.9453125, "learning_rate": 0.0008860000000000001, "loss": 0.5642, "step": 444 }, { "epoch": 0.0007907948640887818, "grad_norm": 0.62890625, "learning_rate": 0.0008900000000000001, "loss": 0.602, "step": 446 }, { "epoch": 0.000794341029398597, "grad_norm": 3.234375, "learning_rate": 0.000894, "loss": 0.7297, "step": 448 }, { "epoch": 0.0007978871947084121, "grad_norm": 0.7890625, "learning_rate": 0.000898, "loss": 0.701, "step": 450 }, { "epoch": 0.0008014333600182273, "grad_norm": 0.69140625, "learning_rate": 0.000902, "loss": 0.5983, "step": 452 }, { "epoch": 0.0008049795253280425, "grad_norm": 1.421875, "learning_rate": 0.000906, "loss": 0.4609, "step": 454 }, { "epoch": 0.0008085256906378576, "grad_norm": 1.1484375, "learning_rate": 0.00091, "loss": 0.5314, "step": 456 }, { "epoch": 0.0008120718559476728, "grad_norm": 4.4375, "learning_rate": 0.0009140000000000001, "loss": 1.0837, "step": 458 }, { "epoch": 0.0008156180212574879, "grad_norm": 2.953125, "learning_rate": 0.0009180000000000001, "loss": 0.7036, "step": 460 }, { "epoch": 0.0008191641865673031, "grad_norm": 0.75, "learning_rate": 0.0009220000000000001, "loss": 0.699, "step": 462 }, { "epoch": 0.0008227103518771183, "grad_norm": 0.6171875, "learning_rate": 0.0009260000000000001, "loss": 0.6727, "step": 464 }, { "epoch": 0.0008262565171869334, "grad_norm": 2.0, "learning_rate": 0.00093, "loss": 0.6781, "step": 466 }, { "epoch": 0.0008298026824967486, "grad_norm": 0.70703125, "learning_rate": 0.000934, "loss": 0.603, "step": 468 }, { "epoch": 0.0008333488478065638, "grad_norm": 0.62890625, "learning_rate": 0.0009379999999999999, "loss": 0.5876, "step": 470 }, { "epoch": 0.0008368950131163789, "grad_norm": 0.5703125, "learning_rate": 0.000942, "loss": 0.5201, "step": 472 }, { "epoch": 0.0008404411784261941, "grad_norm": 3.390625, "learning_rate": 0.000946, "loss": 0.6795, "step": 474 }, { "epoch": 0.0008439873437360093, "grad_norm": 1.125, "learning_rate": 0.00095, "loss": 0.5322, "step": 476 }, { "epoch": 0.0008475335090458244, "grad_norm": 2.15625, "learning_rate": 0.000954, "loss": 0.636, "step": 478 }, { "epoch": 0.0008510796743556396, "grad_norm": 0.66015625, "learning_rate": 0.000958, "loss": 0.5606, "step": 480 }, { "epoch": 0.0008546258396654548, "grad_norm": 0.66796875, "learning_rate": 0.000962, "loss": 0.4997, "step": 482 }, { "epoch": 0.0008581720049752699, "grad_norm": 1.1953125, "learning_rate": 0.000966, "loss": 0.704, "step": 484 }, { "epoch": 0.0008617181702850851, "grad_norm": 0.8828125, "learning_rate": 0.0009699999999999999, "loss": 0.5248, "step": 486 }, { "epoch": 0.0008652643355949003, "grad_norm": 5.75, "learning_rate": 0.000974, "loss": 0.7647, "step": 488 }, { "epoch": 0.0008688105009047154, "grad_norm": 0.59765625, "learning_rate": 0.000978, "loss": 0.54, "step": 490 }, { "epoch": 0.0008723566662145306, "grad_norm": 3.109375, "learning_rate": 0.000982, "loss": 0.6266, "step": 492 }, { "epoch": 0.0008759028315243457, "grad_norm": 2.875, "learning_rate": 0.0009860000000000001, "loss": 0.6661, "step": 494 }, { "epoch": 0.0008794489968341609, "grad_norm": 0.77734375, "learning_rate": 0.00099, "loss": 0.5463, "step": 496 }, { "epoch": 0.0008829951621439761, "grad_norm": 1.078125, "learning_rate": 0.000994, "loss": 0.5743, "step": 498 }, { "epoch": 0.0008865413274537912, "grad_norm": 1.28125, "learning_rate": 0.000998, "loss": 0.6301, "step": 500 }, { "epoch": 0.0008900874927636064, "grad_norm": 0.44921875, "learning_rate": 0.001002, "loss": 0.4796, "step": 502 }, { "epoch": 0.0008936336580734216, "grad_norm": 0.6015625, "learning_rate": 0.001006, "loss": 0.5895, "step": 504 }, { "epoch": 0.0008971798233832367, "grad_norm": 1.875, "learning_rate": 0.00101, "loss": 0.5832, "step": 506 }, { "epoch": 0.0009007259886930519, "grad_norm": 1.296875, "learning_rate": 0.001014, "loss": 0.6405, "step": 508 }, { "epoch": 0.0009042721540028671, "grad_norm": 0.455078125, "learning_rate": 0.001018, "loss": 0.4511, "step": 510 }, { "epoch": 0.0009078183193126822, "grad_norm": 0.515625, "learning_rate": 0.0010220000000000001, "loss": 0.4704, "step": 512 }, { "epoch": 0.0009113644846224974, "grad_norm": 1.1953125, "learning_rate": 0.001026, "loss": 0.55, "step": 514 }, { "epoch": 0.0009149106499323126, "grad_norm": 2.359375, "learning_rate": 0.00103, "loss": 0.6907, "step": 516 }, { "epoch": 0.0009184568152421277, "grad_norm": 1.859375, "learning_rate": 0.001034, "loss": 0.7927, "step": 518 }, { "epoch": 0.0009220029805519429, "grad_norm": 2.484375, "learning_rate": 0.001038, "loss": 0.5656, "step": 520 }, { "epoch": 0.000925549145861758, "grad_norm": 0.78515625, "learning_rate": 0.001042, "loss": 0.6883, "step": 522 }, { "epoch": 0.0009290953111715732, "grad_norm": 1.421875, "learning_rate": 0.001046, "loss": 0.7126, "step": 524 }, { "epoch": 0.0009326414764813884, "grad_norm": 1.328125, "learning_rate": 0.0010500000000000002, "loss": 0.5154, "step": 526 }, { "epoch": 0.0009361876417912035, "grad_norm": 0.71484375, "learning_rate": 0.001054, "loss": 0.5787, "step": 528 }, { "epoch": 0.0009397338071010187, "grad_norm": 0.69140625, "learning_rate": 0.0010580000000000001, "loss": 0.5701, "step": 530 }, { "epoch": 0.0009432799724108339, "grad_norm": 0.640625, "learning_rate": 0.001062, "loss": 0.6469, "step": 532 }, { "epoch": 0.000946826137720649, "grad_norm": 0.9375, "learning_rate": 0.001066, "loss": 0.5711, "step": 534 }, { "epoch": 0.0009503723030304642, "grad_norm": 13.6875, "learning_rate": 0.00107, "loss": 0.559, "step": 536 }, { "epoch": 0.0009539184683402794, "grad_norm": 0.60546875, "learning_rate": 0.001074, "loss": 0.4705, "step": 538 }, { "epoch": 0.0009574646336500945, "grad_norm": 1.2890625, "learning_rate": 0.0010780000000000002, "loss": 0.5083, "step": 540 }, { "epoch": 0.0009610107989599097, "grad_norm": 1.0703125, "learning_rate": 0.001082, "loss": 0.6336, "step": 542 }, { "epoch": 0.0009645569642697249, "grad_norm": 3.21875, "learning_rate": 0.0010860000000000002, "loss": 0.6729, "step": 544 }, { "epoch": 0.00096810312957954, "grad_norm": 0.6640625, "learning_rate": 0.00109, "loss": 0.4936, "step": 546 }, { "epoch": 0.0009716492948893552, "grad_norm": 2.421875, "learning_rate": 0.0010940000000000001, "loss": 0.7861, "step": 548 }, { "epoch": 0.0009751954601991704, "grad_norm": 0.9375, "learning_rate": 0.001098, "loss": 0.5245, "step": 550 }, { "epoch": 0.0009787416255089855, "grad_norm": 1.015625, "learning_rate": 0.0011020000000000001, "loss": 0.4803, "step": 552 }, { "epoch": 0.0009822877908188007, "grad_norm": 4.1875, "learning_rate": 0.0011060000000000002, "loss": 0.5755, "step": 554 }, { "epoch": 0.000985833956128616, "grad_norm": 0.453125, "learning_rate": 0.00111, "loss": 0.4838, "step": 556 }, { "epoch": 0.0009893801214384311, "grad_norm": 1.6171875, "learning_rate": 0.0011140000000000002, "loss": 0.6264, "step": 558 }, { "epoch": 0.0009929262867482461, "grad_norm": 2.25, "learning_rate": 0.001118, "loss": 0.8499, "step": 560 }, { "epoch": 0.0009964724520580613, "grad_norm": 0.76171875, "learning_rate": 0.0011220000000000002, "loss": 0.5218, "step": 562 }, { "epoch": 0.0010000186173678765, "grad_norm": 0.7265625, "learning_rate": 0.0011259999999999998, "loss": 0.6252, "step": 564 }, { "epoch": 0.0010035647826776917, "grad_norm": 0.439453125, "learning_rate": 0.00113, "loss": 0.455, "step": 566 }, { "epoch": 0.001007110947987507, "grad_norm": 0.76953125, "learning_rate": 0.001134, "loss": 0.4322, "step": 568 }, { "epoch": 0.001010657113297322, "grad_norm": 5.59375, "learning_rate": 0.001138, "loss": 1.0676, "step": 570 }, { "epoch": 0.0010142032786071371, "grad_norm": 1.40625, "learning_rate": 0.001142, "loss": 0.5648, "step": 572 }, { "epoch": 0.0010177494439169523, "grad_norm": 0.984375, "learning_rate": 0.0011459999999999999, "loss": 0.8953, "step": 574 }, { "epoch": 0.0010212956092267675, "grad_norm": 0.65234375, "learning_rate": 0.00115, "loss": 0.4995, "step": 576 }, { "epoch": 0.0010248417745365827, "grad_norm": 1.3203125, "learning_rate": 0.0011539999999999999, "loss": 0.5413, "step": 578 }, { "epoch": 0.001028387939846398, "grad_norm": 3.859375, "learning_rate": 0.001158, "loss": 0.7195, "step": 580 }, { "epoch": 0.001031934105156213, "grad_norm": 0.97265625, "learning_rate": 0.0011619999999999998, "loss": 0.6171, "step": 582 }, { "epoch": 0.0010354802704660281, "grad_norm": 0.70703125, "learning_rate": 0.001166, "loss": 0.5062, "step": 584 }, { "epoch": 0.0010390264357758433, "grad_norm": 0.734375, "learning_rate": 0.00117, "loss": 0.5645, "step": 586 }, { "epoch": 0.0010425726010856585, "grad_norm": 0.70703125, "learning_rate": 0.001174, "loss": 0.4871, "step": 588 }, { "epoch": 0.0010461187663954737, "grad_norm": 1.703125, "learning_rate": 0.001178, "loss": 0.4328, "step": 590 }, { "epoch": 0.001049664931705289, "grad_norm": 0.73828125, "learning_rate": 0.0011819999999999999, "loss": 0.5342, "step": 592 }, { "epoch": 0.001053211097015104, "grad_norm": 1.3984375, "learning_rate": 0.001186, "loss": 0.4844, "step": 594 }, { "epoch": 0.0010567572623249191, "grad_norm": 0.9609375, "learning_rate": 0.0011899999999999999, "loss": 0.4575, "step": 596 }, { "epoch": 0.0010603034276347343, "grad_norm": 0.482421875, "learning_rate": 0.001194, "loss": 0.5371, "step": 598 }, { "epoch": 0.0010638495929445495, "grad_norm": 1.59375, "learning_rate": 0.001198, "loss": 0.4835, "step": 600 }, { "epoch": 0.0010673957582543647, "grad_norm": 0.55078125, "learning_rate": 0.001202, "loss": 0.4884, "step": 602 }, { "epoch": 0.0010709419235641797, "grad_norm": 0.5625, "learning_rate": 0.001206, "loss": 0.6999, "step": 604 }, { "epoch": 0.001074488088873995, "grad_norm": 1.03125, "learning_rate": 0.00121, "loss": 0.4397, "step": 606 }, { "epoch": 0.0010780342541838101, "grad_norm": 1.3984375, "learning_rate": 0.001214, "loss": 0.6247, "step": 608 }, { "epoch": 0.0010815804194936253, "grad_norm": 0.91015625, "learning_rate": 0.001218, "loss": 0.3905, "step": 610 }, { "epoch": 0.0010851265848034405, "grad_norm": 0.5546875, "learning_rate": 0.001222, "loss": 0.4631, "step": 612 }, { "epoch": 0.0010886727501132557, "grad_norm": 1.421875, "learning_rate": 0.001226, "loss": 0.4027, "step": 614 }, { "epoch": 0.0010922189154230707, "grad_norm": 0.40234375, "learning_rate": 0.00123, "loss": 0.5303, "step": 616 }, { "epoch": 0.001095765080732886, "grad_norm": 0.7109375, "learning_rate": 0.001234, "loss": 0.4526, "step": 618 }, { "epoch": 0.0010993112460427011, "grad_norm": 2.140625, "learning_rate": 0.001238, "loss": 0.6035, "step": 620 }, { "epoch": 0.0011028574113525163, "grad_norm": 1.15625, "learning_rate": 0.001242, "loss": 0.5793, "step": 622 }, { "epoch": 0.0011064035766623315, "grad_norm": 0.416015625, "learning_rate": 0.001246, "loss": 0.4564, "step": 624 }, { "epoch": 0.0011099497419721467, "grad_norm": 0.58203125, "learning_rate": 0.00125, "loss": 0.4803, "step": 626 }, { "epoch": 0.0011134959072819617, "grad_norm": 0.88671875, "learning_rate": 0.0012540000000000001, "loss": 0.6734, "step": 628 }, { "epoch": 0.001117042072591777, "grad_norm": 0.66015625, "learning_rate": 0.001258, "loss": 0.4855, "step": 630 }, { "epoch": 0.0011205882379015921, "grad_norm": 1.15625, "learning_rate": 0.001262, "loss": 0.4557, "step": 632 }, { "epoch": 0.0011241344032114073, "grad_norm": 1.78125, "learning_rate": 0.001266, "loss": 0.5451, "step": 634 }, { "epoch": 0.0011276805685212225, "grad_norm": 1.7265625, "learning_rate": 0.00127, "loss": 0.5126, "step": 636 }, { "epoch": 0.0011312267338310375, "grad_norm": 1.265625, "learning_rate": 0.001274, "loss": 0.5094, "step": 638 }, { "epoch": 0.0011347728991408527, "grad_norm": 2.21875, "learning_rate": 0.001278, "loss": 0.5792, "step": 640 }, { "epoch": 0.001138319064450668, "grad_norm": 1.796875, "learning_rate": 0.0012820000000000002, "loss": 0.5892, "step": 642 }, { "epoch": 0.0011418652297604831, "grad_norm": 0.69921875, "learning_rate": 0.001286, "loss": 0.5642, "step": 644 }, { "epoch": 0.0011454113950702983, "grad_norm": 0.50390625, "learning_rate": 0.0012900000000000001, "loss": 0.4613, "step": 646 }, { "epoch": 0.0011489575603801136, "grad_norm": 0.4140625, "learning_rate": 0.001294, "loss": 0.5105, "step": 648 }, { "epoch": 0.0011525037256899285, "grad_norm": 0.5234375, "learning_rate": 0.0012980000000000001, "loss": 0.7083, "step": 650 }, { "epoch": 0.0011560498909997437, "grad_norm": 0.6640625, "learning_rate": 0.001302, "loss": 0.4454, "step": 652 }, { "epoch": 0.001159596056309559, "grad_norm": 0.52734375, "learning_rate": 0.001306, "loss": 0.3984, "step": 654 }, { "epoch": 0.0011631422216193741, "grad_norm": 1.7578125, "learning_rate": 0.0013100000000000002, "loss": 0.4624, "step": 656 }, { "epoch": 0.0011666883869291894, "grad_norm": 0.39453125, "learning_rate": 0.001314, "loss": 0.5275, "step": 658 }, { "epoch": 0.0011702345522390046, "grad_norm": 0.62890625, "learning_rate": 0.0013180000000000002, "loss": 0.4649, "step": 660 }, { "epoch": 0.0011737807175488195, "grad_norm": 0.451171875, "learning_rate": 0.001322, "loss": 0.4366, "step": 662 }, { "epoch": 0.0011773268828586347, "grad_norm": 0.41015625, "learning_rate": 0.0013260000000000001, "loss": 0.4079, "step": 664 }, { "epoch": 0.00118087304816845, "grad_norm": 0.466796875, "learning_rate": 0.00133, "loss": 0.537, "step": 666 }, { "epoch": 0.0011844192134782652, "grad_norm": 0.8046875, "learning_rate": 0.0013340000000000001, "loss": 0.4374, "step": 668 }, { "epoch": 0.0011879653787880804, "grad_norm": 0.828125, "learning_rate": 0.0013380000000000002, "loss": 0.5113, "step": 670 }, { "epoch": 0.0011915115440978953, "grad_norm": 0.5546875, "learning_rate": 0.001342, "loss": 0.4182, "step": 672 }, { "epoch": 0.0011950577094077105, "grad_norm": 2.015625, "learning_rate": 0.0013460000000000002, "loss": 0.4994, "step": 674 }, { "epoch": 0.0011986038747175258, "grad_norm": 1.5546875, "learning_rate": 0.00135, "loss": 0.7251, "step": 676 }, { "epoch": 0.001202150040027341, "grad_norm": 2.65625, "learning_rate": 0.0013540000000000002, "loss": 0.8491, "step": 678 }, { "epoch": 0.0012056962053371562, "grad_norm": 0.62890625, "learning_rate": 0.001358, "loss": 0.3866, "step": 680 }, { "epoch": 0.0012092423706469714, "grad_norm": 0.82421875, "learning_rate": 0.0013620000000000001, "loss": 0.4451, "step": 682 }, { "epoch": 0.0012127885359567863, "grad_norm": 0.796875, "learning_rate": 0.001366, "loss": 0.3521, "step": 684 }, { "epoch": 0.0012163347012666016, "grad_norm": 0.392578125, "learning_rate": 0.0013700000000000001, "loss": 0.4155, "step": 686 }, { "epoch": 0.0012198808665764168, "grad_norm": 1.28125, "learning_rate": 0.0013740000000000002, "loss": 0.5518, "step": 688 }, { "epoch": 0.001223427031886232, "grad_norm": 0.462890625, "learning_rate": 0.0013779999999999999, "loss": 0.3655, "step": 690 }, { "epoch": 0.0012269731971960472, "grad_norm": 1.109375, "learning_rate": 0.001382, "loss": 0.3804, "step": 692 }, { "epoch": 0.0012305193625058621, "grad_norm": 2.796875, "learning_rate": 0.0013859999999999999, "loss": 0.663, "step": 694 }, { "epoch": 0.0012340655278156774, "grad_norm": 1.203125, "learning_rate": 0.00139, "loss": 0.5644, "step": 696 }, { "epoch": 0.0012376116931254926, "grad_norm": 4.28125, "learning_rate": 0.0013939999999999998, "loss": 0.5431, "step": 698 }, { "epoch": 0.0012411578584353078, "grad_norm": 0.7578125, "learning_rate": 0.001398, "loss": 0.4251, "step": 700 }, { "epoch": 0.001244704023745123, "grad_norm": 0.390625, "learning_rate": 0.001402, "loss": 0.4524, "step": 702 }, { "epoch": 0.0012482501890549382, "grad_norm": 0.36328125, "learning_rate": 0.001406, "loss": 0.3926, "step": 704 }, { "epoch": 0.0012517963543647532, "grad_norm": 1.1484375, "learning_rate": 0.00141, "loss": 0.4461, "step": 706 }, { "epoch": 0.0012553425196745684, "grad_norm": 0.431640625, "learning_rate": 0.001414, "loss": 0.5012, "step": 708 }, { "epoch": 0.0012588886849843836, "grad_norm": 0.46484375, "learning_rate": 0.001418, "loss": 0.4257, "step": 710 }, { "epoch": 0.0012624348502941988, "grad_norm": 1.75, "learning_rate": 0.0014219999999999999, "loss": 0.4114, "step": 712 }, { "epoch": 0.001265981015604014, "grad_norm": 0.66796875, "learning_rate": 0.001426, "loss": 0.4855, "step": 714 }, { "epoch": 0.0012695271809138292, "grad_norm": 2.171875, "learning_rate": 0.00143, "loss": 0.6086, "step": 716 }, { "epoch": 0.0012730733462236442, "grad_norm": 1.375, "learning_rate": 0.001434, "loss": 0.5398, "step": 718 }, { "epoch": 0.0012766195115334594, "grad_norm": 1.2109375, "learning_rate": 0.001438, "loss": 0.5382, "step": 720 }, { "epoch": 0.0012801656768432746, "grad_norm": 1.9765625, "learning_rate": 0.001442, "loss": 0.4693, "step": 722 }, { "epoch": 0.0012837118421530898, "grad_norm": 1.1015625, "learning_rate": 0.001446, "loss": 0.4705, "step": 724 }, { "epoch": 0.001287258007462905, "grad_norm": 0.83984375, "learning_rate": 0.00145, "loss": 0.4714, "step": 726 }, { "epoch": 0.00129080417277272, "grad_norm": 0.66796875, "learning_rate": 0.001454, "loss": 0.9141, "step": 728 }, { "epoch": 0.0012943503380825352, "grad_norm": 0.77734375, "learning_rate": 0.001458, "loss": 0.6113, "step": 730 }, { "epoch": 0.0012978965033923504, "grad_norm": 2.09375, "learning_rate": 0.001462, "loss": 0.4465, "step": 732 }, { "epoch": 0.0013014426687021656, "grad_norm": 1.1875, "learning_rate": 0.001466, "loss": 0.4927, "step": 734 }, { "epoch": 0.0013049888340119808, "grad_norm": 0.3671875, "learning_rate": 0.00147, "loss": 0.3272, "step": 736 }, { "epoch": 0.001308534999321796, "grad_norm": 0.7578125, "learning_rate": 0.001474, "loss": 0.5498, "step": 738 }, { "epoch": 0.001312081164631611, "grad_norm": 2.6875, "learning_rate": 0.001478, "loss": 0.726, "step": 740 }, { "epoch": 0.0013156273299414262, "grad_norm": 0.97265625, "learning_rate": 0.001482, "loss": 0.4328, "step": 742 }, { "epoch": 0.0013191734952512414, "grad_norm": 3.953125, "learning_rate": 0.0014860000000000001, "loss": 0.4116, "step": 744 }, { "epoch": 0.0013227196605610566, "grad_norm": 1.1015625, "learning_rate": 0.00149, "loss": 0.403, "step": 746 }, { "epoch": 0.0013262658258708718, "grad_norm": 0.671875, "learning_rate": 0.001494, "loss": 0.4467, "step": 748 }, { "epoch": 0.001329811991180687, "grad_norm": 0.96875, "learning_rate": 0.001498, "loss": 0.4754, "step": 750 }, { "epoch": 0.001333358156490502, "grad_norm": 0.9453125, "learning_rate": 0.001502, "loss": 0.5166, "step": 752 }, { "epoch": 0.0013369043218003172, "grad_norm": 1.3125, "learning_rate": 0.001506, "loss": 0.4412, "step": 754 }, { "epoch": 0.0013404504871101324, "grad_norm": 0.83984375, "learning_rate": 0.00151, "loss": 0.3362, "step": 756 }, { "epoch": 0.0013439966524199476, "grad_norm": 0.796875, "learning_rate": 0.001514, "loss": 0.3429, "step": 758 }, { "epoch": 0.0013475428177297628, "grad_norm": 0.470703125, "learning_rate": 0.001518, "loss": 0.4573, "step": 760 }, { "epoch": 0.0013510889830395778, "grad_norm": 0.84375, "learning_rate": 0.0015220000000000001, "loss": 0.5393, "step": 762 }, { "epoch": 0.001354635148349393, "grad_norm": 0.9453125, "learning_rate": 0.001526, "loss": 0.3999, "step": 764 }, { "epoch": 0.0013581813136592082, "grad_norm": 0.447265625, "learning_rate": 0.0015300000000000001, "loss": 0.4177, "step": 766 }, { "epoch": 0.0013617274789690234, "grad_norm": 1.1796875, "learning_rate": 0.001534, "loss": 0.7588, "step": 768 }, { "epoch": 0.0013652736442788386, "grad_norm": 0.66015625, "learning_rate": 0.001538, "loss": 0.4386, "step": 770 }, { "epoch": 0.0013688198095886538, "grad_norm": 1.3359375, "learning_rate": 0.001542, "loss": 0.3225, "step": 772 }, { "epoch": 0.0013723659748984688, "grad_norm": 1.671875, "learning_rate": 0.001546, "loss": 0.4429, "step": 774 }, { "epoch": 0.001375912140208284, "grad_norm": 0.625, "learning_rate": 0.0015500000000000002, "loss": 0.4224, "step": 776 }, { "epoch": 0.0013794583055180992, "grad_norm": 1.0703125, "learning_rate": 0.001554, "loss": 0.4154, "step": 778 }, { "epoch": 0.0013830044708279144, "grad_norm": 0.6015625, "learning_rate": 0.0015580000000000001, "loss": 0.3761, "step": 780 }, { "epoch": 0.0013865506361377296, "grad_norm": 0.515625, "learning_rate": 0.001562, "loss": 0.3326, "step": 782 }, { "epoch": 0.0013900968014475448, "grad_norm": 0.54296875, "learning_rate": 0.0015660000000000001, "loss": 0.4234, "step": 784 }, { "epoch": 0.0013936429667573598, "grad_norm": 0.87109375, "learning_rate": 0.00157, "loss": 0.3707, "step": 786 }, { "epoch": 0.001397189132067175, "grad_norm": 2.421875, "learning_rate": 0.001574, "loss": 0.4198, "step": 788 }, { "epoch": 0.0014007352973769902, "grad_norm": 1.6015625, "learning_rate": 0.0015780000000000002, "loss": 0.6297, "step": 790 }, { "epoch": 0.0014042814626868054, "grad_norm": 2.15625, "learning_rate": 0.001582, "loss": 0.437, "step": 792 }, { "epoch": 0.0014078276279966206, "grad_norm": 0.333984375, "learning_rate": 0.0015860000000000002, "loss": 0.3658, "step": 794 }, { "epoch": 0.0014113737933064356, "grad_norm": 0.4921875, "learning_rate": 0.00159, "loss": 0.423, "step": 796 }, { "epoch": 0.0014149199586162508, "grad_norm": 0.322265625, "learning_rate": 0.0015940000000000001, "loss": 0.353, "step": 798 }, { "epoch": 0.001418466123926066, "grad_norm": 2.28125, "learning_rate": 0.001598, "loss": 0.4482, "step": 800 }, { "epoch": 0.0014220122892358812, "grad_norm": 0.357421875, "learning_rate": 0.0016020000000000001, "loss": 0.5871, "step": 802 }, { "epoch": 0.0014255584545456964, "grad_norm": 0.62109375, "learning_rate": 0.0016060000000000002, "loss": 0.4032, "step": 804 }, { "epoch": 0.0014291046198555116, "grad_norm": 0.7109375, "learning_rate": 0.00161, "loss": 0.3488, "step": 806 }, { "epoch": 0.0014326507851653266, "grad_norm": 1.3828125, "learning_rate": 0.0016140000000000002, "loss": 0.4464, "step": 808 }, { "epoch": 0.0014361969504751418, "grad_norm": 0.70703125, "learning_rate": 0.001618, "loss": 0.427, "step": 810 }, { "epoch": 0.001439743115784957, "grad_norm": 0.796875, "learning_rate": 0.0016220000000000002, "loss": 0.3566, "step": 812 }, { "epoch": 0.0014432892810947722, "grad_norm": 0.3359375, "learning_rate": 0.0016259999999999998, "loss": 0.3749, "step": 814 }, { "epoch": 0.0014468354464045874, "grad_norm": 1.046875, "learning_rate": 0.00163, "loss": 0.4408, "step": 816 }, { "epoch": 0.0014503816117144026, "grad_norm": 0.435546875, "learning_rate": 0.001634, "loss": 0.4197, "step": 818 }, { "epoch": 0.0014539277770242176, "grad_norm": 0.5234375, "learning_rate": 0.001638, "loss": 0.4298, "step": 820 }, { "epoch": 0.0014574739423340328, "grad_norm": 0.59765625, "learning_rate": 0.001642, "loss": 0.3424, "step": 822 }, { "epoch": 0.001461020107643848, "grad_norm": 0.466796875, "learning_rate": 0.001646, "loss": 0.4055, "step": 824 }, { "epoch": 0.0014645662729536632, "grad_norm": 0.3203125, "learning_rate": 0.00165, "loss": 0.387, "step": 826 }, { "epoch": 0.0014681124382634784, "grad_norm": 0.4453125, "learning_rate": 0.0016539999999999999, "loss": 0.4673, "step": 828 }, { "epoch": 0.0014716586035732934, "grad_norm": 1.34375, "learning_rate": 0.001658, "loss": 0.884, "step": 830 }, { "epoch": 0.0014752047688831086, "grad_norm": 0.3828125, "learning_rate": 0.0016619999999999998, "loss": 0.3106, "step": 832 }, { "epoch": 0.0014787509341929238, "grad_norm": 1.09375, "learning_rate": 0.001666, "loss": 0.4596, "step": 834 }, { "epoch": 0.001482297099502739, "grad_norm": 0.875, "learning_rate": 0.00167, "loss": 0.5467, "step": 836 }, { "epoch": 0.0014858432648125542, "grad_norm": 0.68359375, "learning_rate": 0.001674, "loss": 0.3769, "step": 838 }, { "epoch": 0.0014893894301223694, "grad_norm": 0.73828125, "learning_rate": 0.001678, "loss": 0.4614, "step": 840 }, { "epoch": 0.0014929355954321844, "grad_norm": 0.6015625, "learning_rate": 0.001682, "loss": 0.4218, "step": 842 }, { "epoch": 0.0014964817607419996, "grad_norm": 0.98828125, "learning_rate": 0.001686, "loss": 0.3143, "step": 844 }, { "epoch": 0.0015000279260518148, "grad_norm": 1.703125, "learning_rate": 0.0016899999999999999, "loss": 0.6092, "step": 846 }, { "epoch": 0.00150357409136163, "grad_norm": 0.416015625, "learning_rate": 0.001694, "loss": 0.3918, "step": 848 }, { "epoch": 0.0015071202566714452, "grad_norm": 0.47265625, "learning_rate": 0.001698, "loss": 0.3889, "step": 850 }, { "epoch": 0.0015106664219812602, "grad_norm": 0.8515625, "learning_rate": 0.001702, "loss": 0.4317, "step": 852 }, { "epoch": 0.0015142125872910754, "grad_norm": 0.5546875, "learning_rate": 0.001706, "loss": 0.433, "step": 854 }, { "epoch": 0.0015177587526008906, "grad_norm": 0.5, "learning_rate": 0.00171, "loss": 0.402, "step": 856 }, { "epoch": 0.0015213049179107058, "grad_norm": 0.984375, "learning_rate": 0.001714, "loss": 0.3781, "step": 858 }, { "epoch": 0.001524851083220521, "grad_norm": 1.125, "learning_rate": 0.001718, "loss": 0.4915, "step": 860 }, { "epoch": 0.0015283972485303362, "grad_norm": 0.48046875, "learning_rate": 0.001722, "loss": 0.485, "step": 862 }, { "epoch": 0.0015319434138401512, "grad_norm": 0.54296875, "learning_rate": 0.001726, "loss": 0.5497, "step": 864 }, { "epoch": 0.0015354895791499664, "grad_norm": 1.046875, "learning_rate": 0.00173, "loss": 0.4267, "step": 866 }, { "epoch": 0.0015390357444597816, "grad_norm": 2.890625, "learning_rate": 0.001734, "loss": 0.4916, "step": 868 }, { "epoch": 0.0015425819097695968, "grad_norm": 0.7265625, "learning_rate": 0.001738, "loss": 0.4525, "step": 870 }, { "epoch": 0.001546128075079412, "grad_norm": 0.734375, "learning_rate": 0.001742, "loss": 0.466, "step": 872 }, { "epoch": 0.0015496742403892272, "grad_norm": 0.6484375, "learning_rate": 0.001746, "loss": 0.4172, "step": 874 }, { "epoch": 0.0015532204056990422, "grad_norm": 2.1875, "learning_rate": 0.00175, "loss": 0.4612, "step": 876 }, { "epoch": 0.0015567665710088574, "grad_norm": 0.51953125, "learning_rate": 0.0017540000000000001, "loss": 0.3634, "step": 878 }, { "epoch": 0.0015603127363186726, "grad_norm": 0.43359375, "learning_rate": 0.001758, "loss": 0.3293, "step": 880 }, { "epoch": 0.0015638589016284878, "grad_norm": 0.45703125, "learning_rate": 0.0017620000000000001, "loss": 0.3595, "step": 882 }, { "epoch": 0.001567405066938303, "grad_norm": 0.58203125, "learning_rate": 0.001766, "loss": 0.3994, "step": 884 }, { "epoch": 0.001570951232248118, "grad_norm": 0.8359375, "learning_rate": 0.00177, "loss": 0.4265, "step": 886 }, { "epoch": 0.0015744973975579332, "grad_norm": 0.62109375, "learning_rate": 0.001774, "loss": 0.3768, "step": 888 }, { "epoch": 0.0015780435628677484, "grad_norm": 0.75, "learning_rate": 0.001778, "loss": 0.3474, "step": 890 }, { "epoch": 0.0015815897281775636, "grad_norm": 0.337890625, "learning_rate": 0.0017820000000000002, "loss": 0.4295, "step": 892 }, { "epoch": 0.0015851358934873788, "grad_norm": 0.5859375, "learning_rate": 0.001786, "loss": 0.4619, "step": 894 }, { "epoch": 0.001588682058797194, "grad_norm": 2.328125, "learning_rate": 0.0017900000000000001, "loss": 0.3903, "step": 896 }, { "epoch": 0.001592228224107009, "grad_norm": 1.6484375, "learning_rate": 0.001794, "loss": 0.4117, "step": 898 }, { "epoch": 0.0015957743894168242, "grad_norm": 1.0, "learning_rate": 0.0017980000000000001, "loss": 0.4776, "step": 900 }, { "epoch": 0.0015993205547266394, "grad_norm": 0.62890625, "learning_rate": 0.001802, "loss": 0.3239, "step": 902 }, { "epoch": 0.0016028667200364546, "grad_norm": 0.439453125, "learning_rate": 0.001806, "loss": 0.4453, "step": 904 }, { "epoch": 0.0016064128853462698, "grad_norm": 0.421875, "learning_rate": 0.0018100000000000002, "loss": 0.3475, "step": 906 }, { "epoch": 0.001609959050656085, "grad_norm": 4.34375, "learning_rate": 0.001814, "loss": 0.5284, "step": 908 }, { "epoch": 0.0016135052159659, "grad_norm": 0.56640625, "learning_rate": 0.0018180000000000002, "loss": 0.3544, "step": 910 }, { "epoch": 0.0016170513812757152, "grad_norm": 0.8828125, "learning_rate": 0.001822, "loss": 0.3681, "step": 912 }, { "epoch": 0.0016205975465855304, "grad_norm": 1.1953125, "learning_rate": 0.0018260000000000001, "loss": 0.3847, "step": 914 }, { "epoch": 0.0016241437118953456, "grad_norm": 0.68359375, "learning_rate": 0.00183, "loss": 0.5371, "step": 916 }, { "epoch": 0.0016276898772051608, "grad_norm": 0.9453125, "learning_rate": 0.0018340000000000001, "loss": 0.4194, "step": 918 }, { "epoch": 0.0016312360425149758, "grad_norm": 3.125, "learning_rate": 0.0018380000000000002, "loss": 0.6421, "step": 920 }, { "epoch": 0.001634782207824791, "grad_norm": 0.51953125, "learning_rate": 0.001842, "loss": 0.4111, "step": 922 }, { "epoch": 0.0016383283731346062, "grad_norm": 1.5546875, "learning_rate": 0.0018460000000000002, "loss": 0.4762, "step": 924 }, { "epoch": 0.0016418745384444214, "grad_norm": 0.466796875, "learning_rate": 0.00185, "loss": 0.4302, "step": 926 }, { "epoch": 0.0016454207037542366, "grad_norm": 0.5703125, "learning_rate": 0.0018540000000000002, "loss": 0.41, "step": 928 }, { "epoch": 0.0016489668690640518, "grad_norm": 0.404296875, "learning_rate": 0.001858, "loss": 0.5211, "step": 930 }, { "epoch": 0.0016525130343738668, "grad_norm": 0.48828125, "learning_rate": 0.0018620000000000002, "loss": 0.5331, "step": 932 }, { "epoch": 0.001656059199683682, "grad_norm": 1.6484375, "learning_rate": 0.001866, "loss": 0.8214, "step": 934 }, { "epoch": 0.0016596053649934972, "grad_norm": 1.875, "learning_rate": 0.0018700000000000001, "loss": 0.3674, "step": 936 }, { "epoch": 0.0016631515303033124, "grad_norm": 17.0, "learning_rate": 0.0018740000000000002, "loss": 0.5836, "step": 938 }, { "epoch": 0.0016666976956131276, "grad_norm": 1.375, "learning_rate": 0.001878, "loss": 0.4125, "step": 940 }, { "epoch": 0.0016702438609229428, "grad_norm": 0.9140625, "learning_rate": 0.001882, "loss": 0.4659, "step": 942 }, { "epoch": 0.0016737900262327578, "grad_norm": 0.478515625, "learning_rate": 0.0018859999999999999, "loss": 0.38, "step": 944 }, { "epoch": 0.001677336191542573, "grad_norm": 0.6484375, "learning_rate": 0.00189, "loss": 0.4455, "step": 946 }, { "epoch": 0.0016808823568523882, "grad_norm": 0.322265625, "learning_rate": 0.0018939999999999999, "loss": 0.3341, "step": 948 }, { "epoch": 0.0016844285221622034, "grad_norm": 0.392578125, "learning_rate": 0.001898, "loss": 0.4781, "step": 950 }, { "epoch": 0.0016879746874720186, "grad_norm": 0.72265625, "learning_rate": 0.001902, "loss": 0.5476, "step": 952 }, { "epoch": 0.0016915208527818336, "grad_norm": 1.2734375, "learning_rate": 0.001906, "loss": 0.476, "step": 954 }, { "epoch": 0.0016950670180916488, "grad_norm": 0.27734375, "learning_rate": 0.00191, "loss": 0.3611, "step": 956 }, { "epoch": 0.001698613183401464, "grad_norm": 1.859375, "learning_rate": 0.001914, "loss": 0.6158, "step": 958 }, { "epoch": 0.0017021593487112792, "grad_norm": 1.703125, "learning_rate": 0.001918, "loss": 0.4304, "step": 960 }, { "epoch": 0.0017057055140210944, "grad_norm": 0.431640625, "learning_rate": 0.0019219999999999999, "loss": 0.3029, "step": 962 }, { "epoch": 0.0017092516793309096, "grad_norm": 0.63671875, "learning_rate": 0.001926, "loss": 0.5024, "step": 964 }, { "epoch": 0.0017127978446407246, "grad_norm": 10.375, "learning_rate": 0.00193, "loss": 0.4969, "step": 966 }, { "epoch": 0.0017163440099505398, "grad_norm": 0.55859375, "learning_rate": 0.001934, "loss": 0.7104, "step": 968 }, { "epoch": 0.001719890175260355, "grad_norm": 0.57421875, "learning_rate": 0.001938, "loss": 0.5163, "step": 970 }, { "epoch": 0.0017234363405701702, "grad_norm": 0.376953125, "learning_rate": 0.001942, "loss": 0.4006, "step": 972 }, { "epoch": 0.0017269825058799854, "grad_norm": 0.453125, "learning_rate": 0.001946, "loss": 0.3615, "step": 974 }, { "epoch": 0.0017305286711898006, "grad_norm": 0.3828125, "learning_rate": 0.00195, "loss": 0.3554, "step": 976 }, { "epoch": 0.0017340748364996156, "grad_norm": 0.56640625, "learning_rate": 0.001954, "loss": 0.4427, "step": 978 }, { "epoch": 0.0017376210018094308, "grad_norm": 0.9921875, "learning_rate": 0.001958, "loss": 0.4424, "step": 980 }, { "epoch": 0.001741167167119246, "grad_norm": 1.4609375, "learning_rate": 0.001962, "loss": 0.5594, "step": 982 }, { "epoch": 0.0017447133324290612, "grad_norm": 1.0625, "learning_rate": 0.001966, "loss": 0.4296, "step": 984 }, { "epoch": 0.0017482594977388764, "grad_norm": 1.4765625, "learning_rate": 0.00197, "loss": 0.4107, "step": 986 }, { "epoch": 0.0017518056630486914, "grad_norm": 0.3984375, "learning_rate": 0.001974, "loss": 0.38, "step": 988 }, { "epoch": 0.0017553518283585066, "grad_norm": 0.68359375, "learning_rate": 0.001978, "loss": 0.4456, "step": 990 }, { "epoch": 0.0017588979936683218, "grad_norm": 0.33203125, "learning_rate": 0.001982, "loss": 0.4618, "step": 992 }, { "epoch": 0.001762444158978137, "grad_norm": 0.33984375, "learning_rate": 0.001986, "loss": 0.3707, "step": 994 }, { "epoch": 0.0017659903242879522, "grad_norm": 0.255859375, "learning_rate": 0.00199, "loss": 0.3779, "step": 996 }, { "epoch": 0.0017695364895977674, "grad_norm": 0.2734375, "learning_rate": 0.001994, "loss": 0.4004, "step": 998 }, { "epoch": 0.0017730826549075824, "grad_norm": 2.25, "learning_rate": 0.001998, "loss": 0.3643, "step": 1000 }, { "epoch": 0.0017766288202173976, "grad_norm": 0.48046875, "learning_rate": 0.0019999999995079904, "loss": 0.3971, "step": 1002 }, { "epoch": 0.0017801749855272128, "grad_norm": 0.78515625, "learning_rate": 0.0019999999955719132, "loss": 0.3017, "step": 1004 }, { "epoch": 0.001783721150837028, "grad_norm": 0.470703125, "learning_rate": 0.001999999987699759, "loss": 0.3235, "step": 1006 }, { "epoch": 0.0017872673161468432, "grad_norm": 0.90234375, "learning_rate": 0.0019999999758915274, "loss": 0.4187, "step": 1008 }, { "epoch": 0.0017908134814566582, "grad_norm": 1.6484375, "learning_rate": 0.0019999999601472184, "loss": 0.5903, "step": 1010 }, { "epoch": 0.0017943596467664734, "grad_norm": 0.384765625, "learning_rate": 0.0019999999404668326, "loss": 0.6096, "step": 1012 }, { "epoch": 0.0017979058120762886, "grad_norm": 1.3515625, "learning_rate": 0.00199999991685037, "loss": 0.3594, "step": 1014 }, { "epoch": 0.0018014519773861038, "grad_norm": 0.388671875, "learning_rate": 0.001999999889297831, "loss": 0.541, "step": 1016 }, { "epoch": 0.001804998142695919, "grad_norm": 0.40234375, "learning_rate": 0.001999999857809214, "loss": 0.3586, "step": 1018 }, { "epoch": 0.0018085443080057342, "grad_norm": 1.015625, "learning_rate": 0.0019999998223845213, "loss": 0.507, "step": 1020 }, { "epoch": 0.0018120904733155492, "grad_norm": 0.91796875, "learning_rate": 0.001999999783023752, "loss": 0.4054, "step": 1022 }, { "epoch": 0.0018156366386253644, "grad_norm": 0.84375, "learning_rate": 0.0019999997397269066, "loss": 0.3707, "step": 1024 }, { "epoch": 0.0018191828039351796, "grad_norm": 1.8359375, "learning_rate": 0.0019999996924939846, "loss": 0.6278, "step": 1026 }, { "epoch": 0.0018227289692449948, "grad_norm": 0.34765625, "learning_rate": 0.001999999641324987, "loss": 0.3858, "step": 1028 }, { "epoch": 0.00182627513455481, "grad_norm": 0.5859375, "learning_rate": 0.001999999586219914, "loss": 0.3753, "step": 1030 }, { "epoch": 0.0018298212998646252, "grad_norm": 0.6171875, "learning_rate": 0.0019999995271787656, "loss": 0.3808, "step": 1032 }, { "epoch": 0.0018333674651744402, "grad_norm": 0.390625, "learning_rate": 0.0019999994642015415, "loss": 0.2981, "step": 1034 }, { "epoch": 0.0018369136304842554, "grad_norm": 0.734375, "learning_rate": 0.0019999993972882432, "loss": 0.4874, "step": 1036 }, { "epoch": 0.0018404597957940706, "grad_norm": 0.349609375, "learning_rate": 0.00199999932643887, "loss": 0.3889, "step": 1038 }, { "epoch": 0.0018440059611038858, "grad_norm": 0.375, "learning_rate": 0.0019999992516534226, "loss": 0.4473, "step": 1040 }, { "epoch": 0.001847552126413701, "grad_norm": 0.333984375, "learning_rate": 0.001999999172931901, "loss": 0.3383, "step": 1042 }, { "epoch": 0.001851098291723516, "grad_norm": 0.87109375, "learning_rate": 0.001999999090274306, "loss": 0.4581, "step": 1044 }, { "epoch": 0.0018546444570333312, "grad_norm": 0.5234375, "learning_rate": 0.001999999003680638, "loss": 0.3222, "step": 1046 }, { "epoch": 0.0018581906223431464, "grad_norm": 0.62109375, "learning_rate": 0.0019999989131508963, "loss": 0.3686, "step": 1048 }, { "epoch": 0.0018617367876529616, "grad_norm": 0.90234375, "learning_rate": 0.001999998818685083, "loss": 0.5504, "step": 1050 }, { "epoch": 0.0018652829529627768, "grad_norm": 0.3125, "learning_rate": 0.0019999987202831975, "loss": 0.3204, "step": 1052 }, { "epoch": 0.001868829118272592, "grad_norm": 0.361328125, "learning_rate": 0.0019999986179452403, "loss": 0.3595, "step": 1054 }, { "epoch": 0.001872375283582407, "grad_norm": 0.3359375, "learning_rate": 0.0019999985116712117, "loss": 0.3356, "step": 1056 }, { "epoch": 0.0018759214488922222, "grad_norm": 0.40625, "learning_rate": 0.0019999984014611124, "loss": 0.4235, "step": 1058 }, { "epoch": 0.0018794676142020374, "grad_norm": 0.390625, "learning_rate": 0.0019999982873149433, "loss": 0.3783, "step": 1060 }, { "epoch": 0.0018830137795118526, "grad_norm": 0.330078125, "learning_rate": 0.0019999981692327045, "loss": 0.4312, "step": 1062 }, { "epoch": 0.0018865599448216678, "grad_norm": 0.287109375, "learning_rate": 0.001999998047214396, "loss": 0.3751, "step": 1064 }, { "epoch": 0.001890106110131483, "grad_norm": 0.498046875, "learning_rate": 0.0019999979212600187, "loss": 0.4861, "step": 1066 }, { "epoch": 0.001893652275441298, "grad_norm": 0.4921875, "learning_rate": 0.001999997791369574, "loss": 0.4154, "step": 1068 }, { "epoch": 0.0018971984407511132, "grad_norm": 0.392578125, "learning_rate": 0.001999997657543061, "loss": 0.3348, "step": 1070 }, { "epoch": 0.0019007446060609284, "grad_norm": 1.4140625, "learning_rate": 0.0019999975197804816, "loss": 0.5562, "step": 1072 }, { "epoch": 0.0019042907713707436, "grad_norm": 3.984375, "learning_rate": 0.0019999973780818353, "loss": 0.5703, "step": 1074 }, { "epoch": 0.0019078369366805588, "grad_norm": 0.31640625, "learning_rate": 0.0019999972324471235, "loss": 0.3959, "step": 1076 }, { "epoch": 0.0019113831019903738, "grad_norm": 0.43359375, "learning_rate": 0.0019999970828763467, "loss": 0.3247, "step": 1078 }, { "epoch": 0.001914929267300189, "grad_norm": 0.6796875, "learning_rate": 0.0019999969293695054, "loss": 0.439, "step": 1080 }, { "epoch": 0.0019184754326100042, "grad_norm": 0.380859375, "learning_rate": 0.0019999967719266003, "loss": 0.6608, "step": 1082 }, { "epoch": 0.0019220215979198194, "grad_norm": 1.375, "learning_rate": 0.001999996610547632, "loss": 0.4712, "step": 1084 }, { "epoch": 0.0019255677632296346, "grad_norm": 0.8671875, "learning_rate": 0.0019999964452326016, "loss": 0.606, "step": 1086 }, { "epoch": 0.0019291139285394499, "grad_norm": 1.359375, "learning_rate": 0.0019999962759815093, "loss": 0.5051, "step": 1088 }, { "epoch": 0.0019326600938492648, "grad_norm": 0.30078125, "learning_rate": 0.001999996102794356, "loss": 0.3872, "step": 1090 }, { "epoch": 0.00193620625915908, "grad_norm": 0.42578125, "learning_rate": 0.0019999959256711427, "loss": 0.3313, "step": 1092 }, { "epoch": 0.0019397524244688952, "grad_norm": 0.99609375, "learning_rate": 0.0019999957446118696, "loss": 0.4841, "step": 1094 }, { "epoch": 0.0019432985897787104, "grad_norm": 0.92578125, "learning_rate": 0.001999995559616538, "loss": 0.5517, "step": 1096 }, { "epoch": 0.0019468447550885257, "grad_norm": 0.443359375, "learning_rate": 0.0019999953706851493, "loss": 0.3948, "step": 1098 }, { "epoch": 0.0019503909203983409, "grad_norm": 0.451171875, "learning_rate": 0.001999995177817703, "loss": 0.5052, "step": 1100 }, { "epoch": 0.001953937085708156, "grad_norm": 1.3828125, "learning_rate": 0.0019999949810142006, "loss": 0.4857, "step": 1102 }, { "epoch": 0.001957483251017971, "grad_norm": 0.369140625, "learning_rate": 0.0019999947802746432, "loss": 0.3922, "step": 1104 }, { "epoch": 0.001961029416327786, "grad_norm": 2.859375, "learning_rate": 0.0019999945755990313, "loss": 0.4307, "step": 1106 }, { "epoch": 0.0019645755816376015, "grad_norm": 0.62890625, "learning_rate": 0.0019999943669873656, "loss": 0.4164, "step": 1108 }, { "epoch": 0.0019681217469474164, "grad_norm": 0.45703125, "learning_rate": 0.0019999941544396474, "loss": 0.3841, "step": 1110 }, { "epoch": 0.001971667912257232, "grad_norm": 4.0625, "learning_rate": 0.001999993937955878, "loss": 0.5836, "step": 1112 }, { "epoch": 0.001975214077567047, "grad_norm": 0.5625, "learning_rate": 0.0019999937175360577, "loss": 0.3593, "step": 1114 }, { "epoch": 0.0019787602428768623, "grad_norm": 0.380859375, "learning_rate": 0.0019999934931801875, "loss": 0.351, "step": 1116 }, { "epoch": 0.0019823064081866773, "grad_norm": 0.515625, "learning_rate": 0.0019999932648882687, "loss": 0.3285, "step": 1118 }, { "epoch": 0.0019858525734964922, "grad_norm": 0.59375, "learning_rate": 0.001999993032660302, "loss": 0.5993, "step": 1120 }, { "epoch": 0.0019893987388063077, "grad_norm": 0.6875, "learning_rate": 0.0019999927964962885, "loss": 0.3257, "step": 1122 }, { "epoch": 0.0019929449041161226, "grad_norm": 0.6640625, "learning_rate": 0.0019999925563962294, "loss": 0.4925, "step": 1124 }, { "epoch": 0.001996491069425938, "grad_norm": 0.404296875, "learning_rate": 0.001999992312360126, "loss": 0.3838, "step": 1126 }, { "epoch": 0.002000037234735753, "grad_norm": 0.47265625, "learning_rate": 0.001999992064387978, "loss": 0.3545, "step": 1128 }, { "epoch": 0.002003583400045568, "grad_norm": 0.63671875, "learning_rate": 0.0019999918124797883, "loss": 0.4683, "step": 1130 }, { "epoch": 0.0020071295653553835, "grad_norm": 0.76953125, "learning_rate": 0.0019999915566355575, "loss": 0.413, "step": 1132 }, { "epoch": 0.0020106757306651984, "grad_norm": 0.65234375, "learning_rate": 0.0019999912968552856, "loss": 0.4569, "step": 1134 }, { "epoch": 0.002014221895975014, "grad_norm": 0.375, "learning_rate": 0.0019999910331389746, "loss": 0.3477, "step": 1136 }, { "epoch": 0.002017768061284829, "grad_norm": 0.52734375, "learning_rate": 0.001999990765486626, "loss": 0.3038, "step": 1138 }, { "epoch": 0.002021314226594644, "grad_norm": 0.59765625, "learning_rate": 0.00199999049389824, "loss": 0.3498, "step": 1140 }, { "epoch": 0.0020248603919044593, "grad_norm": 0.45703125, "learning_rate": 0.001999990218373819, "loss": 0.563, "step": 1142 }, { "epoch": 0.0020284065572142742, "grad_norm": 0.470703125, "learning_rate": 0.0019999899389133635, "loss": 0.7518, "step": 1144 }, { "epoch": 0.0020319527225240897, "grad_norm": 2.0625, "learning_rate": 0.001999989655516875, "loss": 0.5289, "step": 1146 }, { "epoch": 0.0020354988878339047, "grad_norm": 0.423828125, "learning_rate": 0.001999989368184354, "loss": 0.3241, "step": 1148 }, { "epoch": 0.00203904505314372, "grad_norm": 1.0625, "learning_rate": 0.001999989076915802, "loss": 0.527, "step": 1150 }, { "epoch": 0.002042591218453535, "grad_norm": 1.71875, "learning_rate": 0.001999988781711221, "loss": 0.5642, "step": 1152 }, { "epoch": 0.00204613738376335, "grad_norm": 0.921875, "learning_rate": 0.0019999884825706122, "loss": 0.4935, "step": 1154 }, { "epoch": 0.0020496835490731655, "grad_norm": 0.91796875, "learning_rate": 0.001999988179493976, "loss": 0.3964, "step": 1156 }, { "epoch": 0.0020532297143829805, "grad_norm": 2.359375, "learning_rate": 0.001999987872481314, "loss": 0.4454, "step": 1158 }, { "epoch": 0.002056775879692796, "grad_norm": 0.625, "learning_rate": 0.001999987561532629, "loss": 0.4465, "step": 1160 }, { "epoch": 0.002060322045002611, "grad_norm": 1.0234375, "learning_rate": 0.00199998724664792, "loss": 0.4312, "step": 1162 }, { "epoch": 0.002063868210312426, "grad_norm": 0.427734375, "learning_rate": 0.00199998692782719, "loss": 0.4251, "step": 1164 }, { "epoch": 0.0020674143756222413, "grad_norm": 0.3828125, "learning_rate": 0.00199998660507044, "loss": 0.4131, "step": 1166 }, { "epoch": 0.0020709605409320563, "grad_norm": 0.88671875, "learning_rate": 0.0019999862783776716, "loss": 0.3982, "step": 1168 }, { "epoch": 0.0020745067062418717, "grad_norm": 0.443359375, "learning_rate": 0.0019999859477488856, "loss": 0.3609, "step": 1170 }, { "epoch": 0.0020780528715516867, "grad_norm": 0.447265625, "learning_rate": 0.001999985613184084, "loss": 0.3846, "step": 1172 }, { "epoch": 0.0020815990368615016, "grad_norm": 0.984375, "learning_rate": 0.001999985274683268, "loss": 0.4326, "step": 1174 }, { "epoch": 0.002085145202171317, "grad_norm": 0.26953125, "learning_rate": 0.001999984932246439, "loss": 0.3229, "step": 1176 }, { "epoch": 0.002088691367481132, "grad_norm": 0.298828125, "learning_rate": 0.0019999845858735994, "loss": 0.3905, "step": 1178 }, { "epoch": 0.0020922375327909475, "grad_norm": 0.48828125, "learning_rate": 0.0019999842355647494, "loss": 0.5283, "step": 1180 }, { "epoch": 0.0020957836981007625, "grad_norm": 0.24609375, "learning_rate": 0.001999983881319891, "loss": 0.2918, "step": 1182 }, { "epoch": 0.002099329863410578, "grad_norm": 0.5546875, "learning_rate": 0.0019999835231390263, "loss": 0.3663, "step": 1184 }, { "epoch": 0.002102876028720393, "grad_norm": 0.283203125, "learning_rate": 0.0019999831610221564, "loss": 0.3811, "step": 1186 }, { "epoch": 0.002106422194030208, "grad_norm": 6.90625, "learning_rate": 0.0019999827949692827, "loss": 0.68, "step": 1188 }, { "epoch": 0.0021099683593400233, "grad_norm": 0.7109375, "learning_rate": 0.001999982424980407, "loss": 0.3522, "step": 1190 }, { "epoch": 0.0021135145246498383, "grad_norm": 0.45703125, "learning_rate": 0.0019999820510555313, "loss": 0.3751, "step": 1192 }, { "epoch": 0.0021170606899596537, "grad_norm": 2.09375, "learning_rate": 0.0019999816731946563, "loss": 0.4795, "step": 1194 }, { "epoch": 0.0021206068552694687, "grad_norm": 1.2421875, "learning_rate": 0.0019999812913977844, "loss": 0.3934, "step": 1196 }, { "epoch": 0.0021241530205792837, "grad_norm": 0.38671875, "learning_rate": 0.001999980905664918, "loss": 0.549, "step": 1198 }, { "epoch": 0.002127699185889099, "grad_norm": 1.3125, "learning_rate": 0.001999980515996057, "loss": 0.4363, "step": 1200 }, { "epoch": 0.002131245351198914, "grad_norm": 0.38671875, "learning_rate": 0.001999980122391204, "loss": 0.4437, "step": 1202 }, { "epoch": 0.0021347915165087295, "grad_norm": 0.439453125, "learning_rate": 0.001999979724850361, "loss": 0.3607, "step": 1204 }, { "epoch": 0.0021383376818185445, "grad_norm": 0.345703125, "learning_rate": 0.001999979323373529, "loss": 0.3687, "step": 1206 }, { "epoch": 0.0021418838471283595, "grad_norm": 0.5234375, "learning_rate": 0.001999978917960711, "loss": 0.3193, "step": 1208 }, { "epoch": 0.002145430012438175, "grad_norm": 0.51171875, "learning_rate": 0.0019999785086119073, "loss": 0.4807, "step": 1210 }, { "epoch": 0.00214897617774799, "grad_norm": 0.3359375, "learning_rate": 0.0019999780953271207, "loss": 0.3487, "step": 1212 }, { "epoch": 0.0021525223430578053, "grad_norm": 0.4296875, "learning_rate": 0.0019999776781063523, "loss": 0.4667, "step": 1214 }, { "epoch": 0.0021560685083676203, "grad_norm": 0.26171875, "learning_rate": 0.001999977256949605, "loss": 0.3317, "step": 1216 }, { "epoch": 0.0021596146736774357, "grad_norm": 0.361328125, "learning_rate": 0.001999976831856879, "loss": 0.3814, "step": 1218 }, { "epoch": 0.0021631608389872507, "grad_norm": 0.77734375, "learning_rate": 0.001999976402828178, "loss": 0.3466, "step": 1220 }, { "epoch": 0.0021667070042970657, "grad_norm": 0.62890625, "learning_rate": 0.001999975969863502, "loss": 0.382, "step": 1222 }, { "epoch": 0.002170253169606881, "grad_norm": 0.7109375, "learning_rate": 0.001999975532962855, "loss": 0.5704, "step": 1224 }, { "epoch": 0.002173799334916696, "grad_norm": 3.5625, "learning_rate": 0.0019999750921262374, "loss": 0.6261, "step": 1226 }, { "epoch": 0.0021773455002265115, "grad_norm": 0.921875, "learning_rate": 0.001999974647353651, "loss": 0.408, "step": 1228 }, { "epoch": 0.0021808916655363265, "grad_norm": 0.51953125, "learning_rate": 0.001999974198645099, "loss": 0.3717, "step": 1230 }, { "epoch": 0.0021844378308461415, "grad_norm": 0.287109375, "learning_rate": 0.0019999737460005824, "loss": 0.303, "step": 1232 }, { "epoch": 0.002187983996155957, "grad_norm": 0.62109375, "learning_rate": 0.001999973289420103, "loss": 0.3554, "step": 1234 }, { "epoch": 0.002191530161465772, "grad_norm": 0.326171875, "learning_rate": 0.0019999728289036636, "loss": 0.292, "step": 1236 }, { "epoch": 0.0021950763267755873, "grad_norm": 0.322265625, "learning_rate": 0.0019999723644512656, "loss": 0.3423, "step": 1238 }, { "epoch": 0.0021986224920854023, "grad_norm": 0.72265625, "learning_rate": 0.0019999718960629115, "loss": 0.4001, "step": 1240 }, { "epoch": 0.0022021686573952173, "grad_norm": 0.255859375, "learning_rate": 0.001999971423738603, "loss": 0.4699, "step": 1242 }, { "epoch": 0.0022057148227050327, "grad_norm": 0.306640625, "learning_rate": 0.001999970947478342, "loss": 0.3121, "step": 1244 }, { "epoch": 0.0022092609880148477, "grad_norm": 0.4765625, "learning_rate": 0.0019999704672821307, "loss": 0.3609, "step": 1246 }, { "epoch": 0.002212807153324663, "grad_norm": 0.279296875, "learning_rate": 0.001999969983149972, "loss": 0.3226, "step": 1248 }, { "epoch": 0.002216353318634478, "grad_norm": 0.2734375, "learning_rate": 0.001999969495081867, "loss": 0.3121, "step": 1250 }, { "epoch": 0.0022198994839442935, "grad_norm": 0.408203125, "learning_rate": 0.0019999690030778183, "loss": 0.3878, "step": 1252 }, { "epoch": 0.0022234456492541085, "grad_norm": 0.462890625, "learning_rate": 0.001999968507137828, "loss": 0.3017, "step": 1254 }, { "epoch": 0.0022269918145639235, "grad_norm": 0.69140625, "learning_rate": 0.0019999680072618977, "loss": 0.3342, "step": 1256 }, { "epoch": 0.002230537979873739, "grad_norm": 0.462890625, "learning_rate": 0.001999967503450031, "loss": 0.3295, "step": 1258 }, { "epoch": 0.002234084145183554, "grad_norm": 0.33984375, "learning_rate": 0.0019999669957022283, "loss": 0.3612, "step": 1260 }, { "epoch": 0.0022376303104933693, "grad_norm": 0.404296875, "learning_rate": 0.001999966484018493, "loss": 0.4064, "step": 1262 }, { "epoch": 0.0022411764758031843, "grad_norm": 0.97265625, "learning_rate": 0.0019999659683988275, "loss": 0.3753, "step": 1264 }, { "epoch": 0.0022447226411129993, "grad_norm": 0.5703125, "learning_rate": 0.0019999654488432332, "loss": 0.3581, "step": 1266 }, { "epoch": 0.0022482688064228147, "grad_norm": 0.328125, "learning_rate": 0.0019999649253517127, "loss": 0.3033, "step": 1268 }, { "epoch": 0.0022518149717326297, "grad_norm": 0.59765625, "learning_rate": 0.0019999643979242685, "loss": 0.3, "step": 1270 }, { "epoch": 0.002255361137042445, "grad_norm": 0.7578125, "learning_rate": 0.001999963866560903, "loss": 0.3704, "step": 1272 }, { "epoch": 0.00225890730235226, "grad_norm": 0.310546875, "learning_rate": 0.001999963331261618, "loss": 0.3202, "step": 1274 }, { "epoch": 0.002262453467662075, "grad_norm": 0.83984375, "learning_rate": 0.0019999627920264163, "loss": 0.4499, "step": 1276 }, { "epoch": 0.0022659996329718905, "grad_norm": 1.640625, "learning_rate": 0.0019999622488553, "loss": 0.4349, "step": 1278 }, { "epoch": 0.0022695457982817055, "grad_norm": 0.427734375, "learning_rate": 0.0019999617017482717, "loss": 0.4296, "step": 1280 }, { "epoch": 0.002273091963591521, "grad_norm": 1.0546875, "learning_rate": 0.001999961150705334, "loss": 0.3572, "step": 1282 }, { "epoch": 0.002276638128901336, "grad_norm": 0.515625, "learning_rate": 0.0019999605957264884, "loss": 0.3237, "step": 1284 }, { "epoch": 0.0022801842942111513, "grad_norm": 0.451171875, "learning_rate": 0.0019999600368117384, "loss": 0.3838, "step": 1286 }, { "epoch": 0.0022837304595209663, "grad_norm": 0.466796875, "learning_rate": 0.0019999594739610856, "loss": 0.3879, "step": 1288 }, { "epoch": 0.0022872766248307813, "grad_norm": 0.3046875, "learning_rate": 0.0019999589071745326, "loss": 0.4766, "step": 1290 }, { "epoch": 0.0022908227901405967, "grad_norm": 0.275390625, "learning_rate": 0.001999958336452083, "loss": 0.3154, "step": 1292 }, { "epoch": 0.0022943689554504117, "grad_norm": 0.79296875, "learning_rate": 0.0019999577617937376, "loss": 0.2976, "step": 1294 }, { "epoch": 0.002297915120760227, "grad_norm": 0.255859375, "learning_rate": 0.0019999571831995, "loss": 0.3841, "step": 1296 }, { "epoch": 0.002301461286070042, "grad_norm": 0.56640625, "learning_rate": 0.001999956600669372, "loss": 0.3736, "step": 1298 }, { "epoch": 0.002305007451379857, "grad_norm": 0.8046875, "learning_rate": 0.0019999560142033575, "loss": 0.4563, "step": 1300 }, { "epoch": 0.0023085536166896725, "grad_norm": 0.5078125, "learning_rate": 0.0019999554238014573, "loss": 0.3575, "step": 1302 }, { "epoch": 0.0023120997819994875, "grad_norm": 1.7734375, "learning_rate": 0.0019999548294636752, "loss": 0.465, "step": 1304 }, { "epoch": 0.002315645947309303, "grad_norm": 0.412109375, "learning_rate": 0.0019999542311900133, "loss": 0.3361, "step": 1306 }, { "epoch": 0.002319192112619118, "grad_norm": 0.65234375, "learning_rate": 0.0019999536289804745, "loss": 0.3126, "step": 1308 }, { "epoch": 0.002322738277928933, "grad_norm": 0.44140625, "learning_rate": 0.001999953022835061, "loss": 0.4039, "step": 1310 }, { "epoch": 0.0023262844432387483, "grad_norm": 0.51171875, "learning_rate": 0.001999952412753776, "loss": 0.3648, "step": 1312 }, { "epoch": 0.0023298306085485633, "grad_norm": 0.65625, "learning_rate": 0.0019999517987366222, "loss": 0.3008, "step": 1314 }, { "epoch": 0.0023333767738583787, "grad_norm": 2.546875, "learning_rate": 0.0019999511807836014, "loss": 0.3931, "step": 1316 }, { "epoch": 0.0023369229391681937, "grad_norm": 0.64453125, "learning_rate": 0.0019999505588947173, "loss": 0.3711, "step": 1318 }, { "epoch": 0.002340469104478009, "grad_norm": 0.220703125, "learning_rate": 0.0019999499330699715, "loss": 0.2766, "step": 1320 }, { "epoch": 0.002344015269787824, "grad_norm": 0.349609375, "learning_rate": 0.0019999493033093685, "loss": 0.3488, "step": 1322 }, { "epoch": 0.002347561435097639, "grad_norm": 1.9296875, "learning_rate": 0.001999948669612909, "loss": 0.4454, "step": 1324 }, { "epoch": 0.0023511076004074545, "grad_norm": 0.87109375, "learning_rate": 0.0019999480319805975, "loss": 0.5258, "step": 1326 }, { "epoch": 0.0023546537657172695, "grad_norm": 0.8203125, "learning_rate": 0.001999947390412436, "loss": 0.3523, "step": 1328 }, { "epoch": 0.002358199931027085, "grad_norm": 1.1875, "learning_rate": 0.001999946744908427, "loss": 0.3534, "step": 1330 }, { "epoch": 0.0023617460963369, "grad_norm": 0.56640625, "learning_rate": 0.001999946095468574, "loss": 0.3965, "step": 1332 }, { "epoch": 0.002365292261646715, "grad_norm": 0.86328125, "learning_rate": 0.001999945442092879, "loss": 0.5747, "step": 1334 }, { "epoch": 0.0023688384269565303, "grad_norm": 0.44921875, "learning_rate": 0.001999944784781346, "loss": 0.3435, "step": 1336 }, { "epoch": 0.0023723845922663453, "grad_norm": 1.921875, "learning_rate": 0.001999944123533977, "loss": 0.4215, "step": 1338 }, { "epoch": 0.0023759307575761607, "grad_norm": 1.3671875, "learning_rate": 0.0019999434583507754, "loss": 0.3861, "step": 1340 }, { "epoch": 0.0023794769228859757, "grad_norm": 1.9765625, "learning_rate": 0.0019999427892317435, "loss": 0.4901, "step": 1342 }, { "epoch": 0.0023830230881957907, "grad_norm": 0.44140625, "learning_rate": 0.0019999421161768847, "loss": 0.3331, "step": 1344 }, { "epoch": 0.002386569253505606, "grad_norm": 0.34765625, "learning_rate": 0.001999941439186202, "loss": 0.5539, "step": 1346 }, { "epoch": 0.002390115418815421, "grad_norm": 0.484375, "learning_rate": 0.0019999407582596976, "loss": 0.3413, "step": 1348 }, { "epoch": 0.0023936615841252365, "grad_norm": 1.7578125, "learning_rate": 0.001999940073397376, "loss": 0.46, "step": 1350 }, { "epoch": 0.0023972077494350515, "grad_norm": 1.0078125, "learning_rate": 0.001999939384599239, "loss": 0.5004, "step": 1352 }, { "epoch": 0.002400753914744867, "grad_norm": 0.3828125, "learning_rate": 0.0019999386918652896, "loss": 0.4681, "step": 1354 }, { "epoch": 0.002404300080054682, "grad_norm": 0.431640625, "learning_rate": 0.001999937995195531, "loss": 0.4008, "step": 1356 }, { "epoch": 0.002407846245364497, "grad_norm": 0.39453125, "learning_rate": 0.0019999372945899664, "loss": 0.3076, "step": 1358 }, { "epoch": 0.0024113924106743123, "grad_norm": 0.369140625, "learning_rate": 0.0019999365900485993, "loss": 0.3904, "step": 1360 }, { "epoch": 0.0024149385759841273, "grad_norm": 0.23828125, "learning_rate": 0.0019999358815714317, "loss": 0.3989, "step": 1362 }, { "epoch": 0.0024184847412939427, "grad_norm": 0.5, "learning_rate": 0.0019999351691584676, "loss": 0.3566, "step": 1364 }, { "epoch": 0.0024220309066037577, "grad_norm": 0.66015625, "learning_rate": 0.0019999344528097095, "loss": 0.3267, "step": 1366 }, { "epoch": 0.0024255770719135727, "grad_norm": 0.65234375, "learning_rate": 0.0019999337325251614, "loss": 0.3624, "step": 1368 }, { "epoch": 0.002429123237223388, "grad_norm": 1.296875, "learning_rate": 0.0019999330083048258, "loss": 0.3912, "step": 1370 }, { "epoch": 0.002432669402533203, "grad_norm": 1.078125, "learning_rate": 0.0019999322801487057, "loss": 0.4825, "step": 1372 }, { "epoch": 0.0024362155678430185, "grad_norm": 0.66796875, "learning_rate": 0.0019999315480568047, "loss": 0.3519, "step": 1374 }, { "epoch": 0.0024397617331528335, "grad_norm": 0.6875, "learning_rate": 0.001999930812029126, "loss": 0.4869, "step": 1376 }, { "epoch": 0.0024433078984626485, "grad_norm": 5.1875, "learning_rate": 0.001999930072065672, "loss": 0.6554, "step": 1378 }, { "epoch": 0.002446854063772464, "grad_norm": 0.859375, "learning_rate": 0.0019999293281664473, "loss": 0.4527, "step": 1380 }, { "epoch": 0.002450400229082279, "grad_norm": 0.4140625, "learning_rate": 0.001999928580331454, "loss": 0.4871, "step": 1382 }, { "epoch": 0.0024539463943920943, "grad_norm": 0.255859375, "learning_rate": 0.0019999278285606965, "loss": 0.3651, "step": 1384 }, { "epoch": 0.0024574925597019093, "grad_norm": 0.265625, "learning_rate": 0.0019999270728541766, "loss": 0.3327, "step": 1386 }, { "epoch": 0.0024610387250117243, "grad_norm": 0.59765625, "learning_rate": 0.001999926313211899, "loss": 0.4196, "step": 1388 }, { "epoch": 0.0024645848903215397, "grad_norm": 0.330078125, "learning_rate": 0.0019999255496338663, "loss": 0.3441, "step": 1390 }, { "epoch": 0.0024681310556313547, "grad_norm": 0.61328125, "learning_rate": 0.001999924782120082, "loss": 0.7735, "step": 1392 }, { "epoch": 0.00247167722094117, "grad_norm": 0.19140625, "learning_rate": 0.0019999240106705492, "loss": 0.5843, "step": 1394 }, { "epoch": 0.002475223386250985, "grad_norm": 0.72265625, "learning_rate": 0.0019999232352852715, "loss": 0.4501, "step": 1396 }, { "epoch": 0.0024787695515608005, "grad_norm": 0.453125, "learning_rate": 0.001999922455964253, "loss": 0.4135, "step": 1398 }, { "epoch": 0.0024823157168706155, "grad_norm": 0.478515625, "learning_rate": 0.0019999216727074956, "loss": 0.5371, "step": 1400 }, { "epoch": 0.0024858618821804305, "grad_norm": 1.796875, "learning_rate": 0.001999920885515004, "loss": 0.5403, "step": 1402 }, { "epoch": 0.002489408047490246, "grad_norm": 0.3515625, "learning_rate": 0.0019999200943867806, "loss": 0.4323, "step": 1404 }, { "epoch": 0.002492954212800061, "grad_norm": 0.6875, "learning_rate": 0.00199991929932283, "loss": 0.4454, "step": 1406 }, { "epoch": 0.0024965003781098763, "grad_norm": 0.75390625, "learning_rate": 0.001999918500323155, "loss": 0.339, "step": 1408 }, { "epoch": 0.0025000465434196913, "grad_norm": 0.60546875, "learning_rate": 0.0019999176973877594, "loss": 0.4297, "step": 1410 }, { "epoch": 0.0025035927087295063, "grad_norm": 1.1171875, "learning_rate": 0.001999916890516646, "loss": 0.3729, "step": 1412 }, { "epoch": 0.0025071388740393217, "grad_norm": 1.1875, "learning_rate": 0.0019999160797098195, "loss": 0.346, "step": 1414 }, { "epoch": 0.0025106850393491367, "grad_norm": 0.5859375, "learning_rate": 0.0019999152649672826, "loss": 0.4238, "step": 1416 }, { "epoch": 0.002514231204658952, "grad_norm": 1.1640625, "learning_rate": 0.001999914446289039, "loss": 0.494, "step": 1418 }, { "epoch": 0.002517777369968767, "grad_norm": 0.41796875, "learning_rate": 0.0019999136236750923, "loss": 0.3822, "step": 1420 }, { "epoch": 0.002521323535278582, "grad_norm": 0.46875, "learning_rate": 0.001999912797125446, "loss": 0.4121, "step": 1422 }, { "epoch": 0.0025248697005883975, "grad_norm": 0.56640625, "learning_rate": 0.001999911966640104, "loss": 0.3484, "step": 1424 }, { "epoch": 0.0025284158658982125, "grad_norm": 1.0625, "learning_rate": 0.00199991113221907, "loss": 0.5026, "step": 1426 }, { "epoch": 0.002531962031208028, "grad_norm": 0.97265625, "learning_rate": 0.001999910293862347, "loss": 0.4649, "step": 1428 }, { "epoch": 0.002535508196517843, "grad_norm": 0.458984375, "learning_rate": 0.0019999094515699392, "loss": 0.262, "step": 1430 }, { "epoch": 0.0025390543618276583, "grad_norm": 1.1328125, "learning_rate": 0.0019999086053418503, "loss": 0.3965, "step": 1432 }, { "epoch": 0.0025426005271374733, "grad_norm": 2.046875, "learning_rate": 0.001999907755178084, "loss": 0.5808, "step": 1434 }, { "epoch": 0.0025461466924472883, "grad_norm": 0.56640625, "learning_rate": 0.001999906901078644, "loss": 0.3706, "step": 1436 }, { "epoch": 0.0025496928577571037, "grad_norm": 1.2890625, "learning_rate": 0.0019999060430435337, "loss": 0.309, "step": 1438 }, { "epoch": 0.0025532390230669187, "grad_norm": 0.486328125, "learning_rate": 0.0019999051810727572, "loss": 0.354, "step": 1440 }, { "epoch": 0.002556785188376734, "grad_norm": 0.33984375, "learning_rate": 0.0019999043151663182, "loss": 0.3341, "step": 1442 }, { "epoch": 0.002560331353686549, "grad_norm": 0.796875, "learning_rate": 0.00199990344532422, "loss": 0.3325, "step": 1444 }, { "epoch": 0.002563877518996364, "grad_norm": 1.0703125, "learning_rate": 0.0019999025715464673, "loss": 0.4343, "step": 1446 }, { "epoch": 0.0025674236843061795, "grad_norm": 4.28125, "learning_rate": 0.0019999016938330636, "loss": 0.5948, "step": 1448 }, { "epoch": 0.0025709698496159945, "grad_norm": 0.376953125, "learning_rate": 0.0019999008121840125, "loss": 0.3183, "step": 1450 }, { "epoch": 0.00257451601492581, "grad_norm": 0.482421875, "learning_rate": 0.001999899926599318, "loss": 0.3859, "step": 1452 }, { "epoch": 0.002578062180235625, "grad_norm": 0.388671875, "learning_rate": 0.0019998990370789834, "loss": 0.3776, "step": 1454 }, { "epoch": 0.00258160834554544, "grad_norm": 0.44140625, "learning_rate": 0.0019998981436230136, "loss": 0.3878, "step": 1456 }, { "epoch": 0.0025851545108552553, "grad_norm": 0.65234375, "learning_rate": 0.001999897246231412, "loss": 0.3084, "step": 1458 }, { "epoch": 0.0025887006761650703, "grad_norm": 0.390625, "learning_rate": 0.0019998963449041826, "loss": 0.342, "step": 1460 }, { "epoch": 0.0025922468414748857, "grad_norm": 2.34375, "learning_rate": 0.0019998954396413296, "loss": 0.6345, "step": 1462 }, { "epoch": 0.0025957930067847007, "grad_norm": 0.4609375, "learning_rate": 0.001999894530442856, "loss": 0.3512, "step": 1464 }, { "epoch": 0.002599339172094516, "grad_norm": 0.59765625, "learning_rate": 0.001999893617308767, "loss": 0.3512, "step": 1466 }, { "epoch": 0.002602885337404331, "grad_norm": 0.392578125, "learning_rate": 0.001999892700239066, "loss": 0.2984, "step": 1468 }, { "epoch": 0.002606431502714146, "grad_norm": 0.75390625, "learning_rate": 0.0019998917792337567, "loss": 0.3737, "step": 1470 }, { "epoch": 0.0026099776680239615, "grad_norm": 1.28125, "learning_rate": 0.0019998908542928438, "loss": 0.4438, "step": 1472 }, { "epoch": 0.0026135238333337765, "grad_norm": 0.97265625, "learning_rate": 0.001999889925416331, "loss": 0.3816, "step": 1474 }, { "epoch": 0.002617069998643592, "grad_norm": 0.76171875, "learning_rate": 0.001999888992604222, "loss": 0.335, "step": 1476 }, { "epoch": 0.002620616163953407, "grad_norm": 0.80078125, "learning_rate": 0.0019998880558565217, "loss": 0.4098, "step": 1478 }, { "epoch": 0.002624162329263222, "grad_norm": 0.38671875, "learning_rate": 0.0019998871151732335, "loss": 0.3347, "step": 1480 }, { "epoch": 0.0026277084945730373, "grad_norm": 0.640625, "learning_rate": 0.0019998861705543616, "loss": 0.3202, "step": 1482 }, { "epoch": 0.0026312546598828523, "grad_norm": 1.5859375, "learning_rate": 0.001999885221999911, "loss": 0.4216, "step": 1484 }, { "epoch": 0.0026348008251926677, "grad_norm": 0.8203125, "learning_rate": 0.0019998842695098844, "loss": 0.3309, "step": 1486 }, { "epoch": 0.0026383469905024827, "grad_norm": 0.6015625, "learning_rate": 0.0019998833130842873, "loss": 0.4202, "step": 1488 }, { "epoch": 0.0026418931558122977, "grad_norm": 0.578125, "learning_rate": 0.001999882352723123, "loss": 0.418, "step": 1490 }, { "epoch": 0.002645439321122113, "grad_norm": 0.98828125, "learning_rate": 0.001999881388426396, "loss": 0.4025, "step": 1492 }, { "epoch": 0.002648985486431928, "grad_norm": 1.9296875, "learning_rate": 0.0019998804201941107, "loss": 0.3504, "step": 1494 }, { "epoch": 0.0026525316517417435, "grad_norm": 2.203125, "learning_rate": 0.001999879448026271, "loss": 0.3986, "step": 1496 }, { "epoch": 0.0026560778170515585, "grad_norm": 0.5234375, "learning_rate": 0.0019998784719228815, "loss": 0.3664, "step": 1498 }, { "epoch": 0.002659623982361374, "grad_norm": 0.9296875, "learning_rate": 0.001999877491883946, "loss": 0.4292, "step": 1500 }, { "epoch": 0.002663170147671189, "grad_norm": 0.953125, "learning_rate": 0.0019998765079094695, "loss": 0.4353, "step": 1502 }, { "epoch": 0.002666716312981004, "grad_norm": 2.15625, "learning_rate": 0.0019998755199994553, "loss": 0.4917, "step": 1504 }, { "epoch": 0.0026702624782908193, "grad_norm": 0.412109375, "learning_rate": 0.0019998745281539086, "loss": 0.3018, "step": 1506 }, { "epoch": 0.0026738086436006343, "grad_norm": 0.2890625, "learning_rate": 0.0019998735323728334, "loss": 0.4769, "step": 1508 }, { "epoch": 0.0026773548089104498, "grad_norm": 0.72265625, "learning_rate": 0.001999872532656234, "loss": 0.3584, "step": 1510 }, { "epoch": 0.0026809009742202647, "grad_norm": 5.96875, "learning_rate": 0.0019998715290041147, "loss": 0.3792, "step": 1512 }, { "epoch": 0.0026844471395300797, "grad_norm": 0.6328125, "learning_rate": 0.0019998705214164803, "loss": 0.3475, "step": 1514 }, { "epoch": 0.002687993304839895, "grad_norm": 0.53125, "learning_rate": 0.0019998695098933346, "loss": 0.4474, "step": 1516 }, { "epoch": 0.00269153947014971, "grad_norm": 0.69921875, "learning_rate": 0.0019998684944346826, "loss": 0.3354, "step": 1518 }, { "epoch": 0.0026950856354595256, "grad_norm": 1.4296875, "learning_rate": 0.001999867475040529, "loss": 0.4186, "step": 1520 }, { "epoch": 0.0026986318007693405, "grad_norm": 0.423828125, "learning_rate": 0.001999866451710877, "loss": 0.3646, "step": 1522 }, { "epoch": 0.0027021779660791555, "grad_norm": 1.0546875, "learning_rate": 0.0019998654244457324, "loss": 0.2748, "step": 1524 }, { "epoch": 0.002705724131388971, "grad_norm": 0.59375, "learning_rate": 0.001999864393245099, "loss": 0.4799, "step": 1526 }, { "epoch": 0.002709270296698786, "grad_norm": 0.65234375, "learning_rate": 0.001999863358108981, "loss": 0.4214, "step": 1528 }, { "epoch": 0.0027128164620086014, "grad_norm": 0.435546875, "learning_rate": 0.001999862319037384, "loss": 0.424, "step": 1530 }, { "epoch": 0.0027163626273184163, "grad_norm": 0.9453125, "learning_rate": 0.0019998612760303114, "loss": 0.3688, "step": 1532 }, { "epoch": 0.0027199087926282318, "grad_norm": 0.921875, "learning_rate": 0.0019998602290877687, "loss": 0.4423, "step": 1534 }, { "epoch": 0.0027234549579380467, "grad_norm": 0.69921875, "learning_rate": 0.00199985917820976, "loss": 0.3279, "step": 1536 }, { "epoch": 0.0027270011232478617, "grad_norm": 0.388671875, "learning_rate": 0.0019998581233962895, "loss": 0.3472, "step": 1538 }, { "epoch": 0.002730547288557677, "grad_norm": 0.39453125, "learning_rate": 0.001999857064647363, "loss": 0.2892, "step": 1540 }, { "epoch": 0.002734093453867492, "grad_norm": 0.71484375, "learning_rate": 0.0019998560019629835, "loss": 0.4923, "step": 1542 }, { "epoch": 0.0027376396191773076, "grad_norm": 2.0625, "learning_rate": 0.0019998549353431572, "loss": 0.3521, "step": 1544 }, { "epoch": 0.0027411857844871225, "grad_norm": 1.359375, "learning_rate": 0.0019998538647878882, "loss": 0.4604, "step": 1546 }, { "epoch": 0.0027447319497969375, "grad_norm": 2.28125, "learning_rate": 0.001999852790297181, "loss": 0.4689, "step": 1548 }, { "epoch": 0.002748278115106753, "grad_norm": 4.46875, "learning_rate": 0.00199985171187104, "loss": 0.6894, "step": 1550 }, { "epoch": 0.002751824280416568, "grad_norm": 0.4765625, "learning_rate": 0.0019998506295094707, "loss": 0.3231, "step": 1552 }, { "epoch": 0.0027553704457263834, "grad_norm": 1.7421875, "learning_rate": 0.0019998495432124773, "loss": 0.5036, "step": 1554 }, { "epoch": 0.0027589166110361983, "grad_norm": 0.52734375, "learning_rate": 0.0019998484529800643, "loss": 0.4204, "step": 1556 }, { "epoch": 0.0027624627763460133, "grad_norm": 0.9609375, "learning_rate": 0.0019998473588122376, "loss": 0.4533, "step": 1558 }, { "epoch": 0.0027660089416558288, "grad_norm": 0.80859375, "learning_rate": 0.001999846260709001, "loss": 0.4233, "step": 1560 }, { "epoch": 0.0027695551069656437, "grad_norm": 0.478515625, "learning_rate": 0.001999845158670359, "loss": 0.4601, "step": 1562 }, { "epoch": 0.002773101272275459, "grad_norm": 0.58203125, "learning_rate": 0.0019998440526963175, "loss": 0.3775, "step": 1564 }, { "epoch": 0.002776647437585274, "grad_norm": 0.8984375, "learning_rate": 0.0019998429427868806, "loss": 0.4076, "step": 1566 }, { "epoch": 0.0027801936028950896, "grad_norm": 0.419921875, "learning_rate": 0.0019998418289420535, "loss": 0.3472, "step": 1568 }, { "epoch": 0.0027837397682049046, "grad_norm": 2.078125, "learning_rate": 0.0019998407111618413, "loss": 0.4329, "step": 1570 }, { "epoch": 0.0027872859335147195, "grad_norm": 0.84375, "learning_rate": 0.001999839589446248, "loss": 0.6181, "step": 1572 }, { "epoch": 0.002790832098824535, "grad_norm": 2.671875, "learning_rate": 0.001999838463795279, "loss": 0.4774, "step": 1574 }, { "epoch": 0.00279437826413435, "grad_norm": 2.28125, "learning_rate": 0.0019998373342089396, "loss": 0.4125, "step": 1576 }, { "epoch": 0.0027979244294441654, "grad_norm": 1.4140625, "learning_rate": 0.0019998362006872343, "loss": 0.4688, "step": 1578 }, { "epoch": 0.0028014705947539804, "grad_norm": 0.796875, "learning_rate": 0.001999835063230168, "loss": 0.3709, "step": 1580 }, { "epoch": 0.0028050167600637953, "grad_norm": 0.875, "learning_rate": 0.001999833921837746, "loss": 0.4181, "step": 1582 }, { "epoch": 0.0028085629253736108, "grad_norm": 0.51953125, "learning_rate": 0.0019998327765099726, "loss": 0.3191, "step": 1584 }, { "epoch": 0.0028121090906834257, "grad_norm": 0.38671875, "learning_rate": 0.001999831627246854, "loss": 0.3258, "step": 1586 }, { "epoch": 0.002815655255993241, "grad_norm": 3.21875, "learning_rate": 0.001999830474048394, "loss": 0.6824, "step": 1588 }, { "epoch": 0.002819201421303056, "grad_norm": 0.34375, "learning_rate": 0.0019998293169145986, "loss": 0.3697, "step": 1590 }, { "epoch": 0.002822747586612871, "grad_norm": 0.376953125, "learning_rate": 0.0019998281558454723, "loss": 0.3423, "step": 1592 }, { "epoch": 0.0028262937519226866, "grad_norm": 0.578125, "learning_rate": 0.00199982699084102, "loss": 0.3911, "step": 1594 }, { "epoch": 0.0028298399172325015, "grad_norm": 0.349609375, "learning_rate": 0.0019998258219012474, "loss": 0.4432, "step": 1596 }, { "epoch": 0.002833386082542317, "grad_norm": 1.140625, "learning_rate": 0.0019998246490261595, "loss": 0.4584, "step": 1598 }, { "epoch": 0.002836932247852132, "grad_norm": 0.439453125, "learning_rate": 0.0019998234722157613, "loss": 0.285, "step": 1600 }, { "epoch": 0.0028404784131619474, "grad_norm": 0.87109375, "learning_rate": 0.0019998222914700573, "loss": 0.3189, "step": 1602 }, { "epoch": 0.0028440245784717624, "grad_norm": 0.294921875, "learning_rate": 0.0019998211067890543, "loss": 0.3265, "step": 1604 }, { "epoch": 0.0028475707437815773, "grad_norm": 0.609375, "learning_rate": 0.0019998199181727556, "loss": 0.5614, "step": 1606 }, { "epoch": 0.0028511169090913928, "grad_norm": 0.333984375, "learning_rate": 0.0019998187256211673, "loss": 0.3572, "step": 1608 }, { "epoch": 0.0028546630744012078, "grad_norm": 0.64453125, "learning_rate": 0.001999817529134295, "loss": 0.3555, "step": 1610 }, { "epoch": 0.002858209239711023, "grad_norm": 0.236328125, "learning_rate": 0.001999816328712143, "loss": 0.8434, "step": 1612 }, { "epoch": 0.002861755405020838, "grad_norm": 0.427734375, "learning_rate": 0.0019998151243547178, "loss": 0.3892, "step": 1614 }, { "epoch": 0.002865301570330653, "grad_norm": 0.439453125, "learning_rate": 0.0019998139160620236, "loss": 0.3229, "step": 1616 }, { "epoch": 0.0028688477356404686, "grad_norm": 0.5234375, "learning_rate": 0.001999812703834066, "loss": 0.3374, "step": 1618 }, { "epoch": 0.0028723939009502836, "grad_norm": 0.208984375, "learning_rate": 0.0019998114876708497, "loss": 0.349, "step": 1620 }, { "epoch": 0.002875940066260099, "grad_norm": 0.2578125, "learning_rate": 0.0019998102675723812, "loss": 0.3285, "step": 1622 }, { "epoch": 0.002879486231569914, "grad_norm": 0.39453125, "learning_rate": 0.0019998090435386653, "loss": 0.3325, "step": 1624 }, { "epoch": 0.002883032396879729, "grad_norm": 0.6328125, "learning_rate": 0.001999807815569707, "loss": 0.3392, "step": 1626 }, { "epoch": 0.0028865785621895444, "grad_norm": 2.421875, "learning_rate": 0.0019998065836655124, "loss": 0.3521, "step": 1628 }, { "epoch": 0.0028901247274993594, "grad_norm": 0.97265625, "learning_rate": 0.001999805347826086, "loss": 0.4679, "step": 1630 }, { "epoch": 0.0028936708928091748, "grad_norm": 1.015625, "learning_rate": 0.001999804108051434, "loss": 0.3713, "step": 1632 }, { "epoch": 0.0028972170581189898, "grad_norm": 4.90625, "learning_rate": 0.0019998028643415616, "loss": 0.4651, "step": 1634 }, { "epoch": 0.002900763223428805, "grad_norm": 0.5703125, "learning_rate": 0.0019998016166964737, "loss": 0.4644, "step": 1636 }, { "epoch": 0.00290430938873862, "grad_norm": 0.796875, "learning_rate": 0.001999800365116177, "loss": 0.3905, "step": 1638 }, { "epoch": 0.002907855554048435, "grad_norm": 0.71875, "learning_rate": 0.0019997991096006753, "loss": 0.3075, "step": 1640 }, { "epoch": 0.0029114017193582506, "grad_norm": 0.51953125, "learning_rate": 0.0019997978501499754, "loss": 0.3528, "step": 1642 }, { "epoch": 0.0029149478846680656, "grad_norm": 0.515625, "learning_rate": 0.001999796586764082, "loss": 0.6004, "step": 1644 }, { "epoch": 0.002918494049977881, "grad_norm": 0.56640625, "learning_rate": 0.0019997953194430015, "loss": 0.2975, "step": 1646 }, { "epoch": 0.002922040215287696, "grad_norm": 0.49609375, "learning_rate": 0.0019997940481867385, "loss": 0.3195, "step": 1648 }, { "epoch": 0.002925586380597511, "grad_norm": 0.263671875, "learning_rate": 0.001999792772995299, "loss": 0.3221, "step": 1650 }, { "epoch": 0.0029291325459073264, "grad_norm": 0.37890625, "learning_rate": 0.001999791493868689, "loss": 0.294, "step": 1652 }, { "epoch": 0.0029326787112171414, "grad_norm": 0.47265625, "learning_rate": 0.0019997902108069136, "loss": 0.4062, "step": 1654 }, { "epoch": 0.002936224876526957, "grad_norm": 0.98828125, "learning_rate": 0.001999788923809978, "loss": 0.3383, "step": 1656 }, { "epoch": 0.0029397710418367718, "grad_norm": 0.52734375, "learning_rate": 0.001999787632877889, "loss": 0.4175, "step": 1658 }, { "epoch": 0.0029433172071465868, "grad_norm": 0.58203125, "learning_rate": 0.001999786338010651, "loss": 0.2959, "step": 1660 }, { "epoch": 0.002946863372456402, "grad_norm": 0.423828125, "learning_rate": 0.0019997850392082703, "loss": 0.3044, "step": 1662 }, { "epoch": 0.002950409537766217, "grad_norm": 1.3046875, "learning_rate": 0.0019997837364707526, "loss": 0.4403, "step": 1664 }, { "epoch": 0.0029539557030760326, "grad_norm": 0.4921875, "learning_rate": 0.0019997824297981032, "loss": 0.2723, "step": 1666 }, { "epoch": 0.0029575018683858476, "grad_norm": 0.326171875, "learning_rate": 0.0019997811191903286, "loss": 0.3622, "step": 1668 }, { "epoch": 0.0029610480336956626, "grad_norm": 0.498046875, "learning_rate": 0.0019997798046474335, "loss": 0.3541, "step": 1670 }, { "epoch": 0.002964594199005478, "grad_norm": 0.86328125, "learning_rate": 0.001999778486169425, "loss": 0.2949, "step": 1672 }, { "epoch": 0.002968140364315293, "grad_norm": 1.125, "learning_rate": 0.0019997771637563075, "loss": 0.5068, "step": 1674 }, { "epoch": 0.0029716865296251084, "grad_norm": 0.44140625, "learning_rate": 0.0019997758374080874, "loss": 0.3588, "step": 1676 }, { "epoch": 0.0029752326949349234, "grad_norm": 0.3203125, "learning_rate": 0.0019997745071247703, "loss": 0.3341, "step": 1678 }, { "epoch": 0.002978778860244739, "grad_norm": 0.4453125, "learning_rate": 0.0019997731729063622, "loss": 0.4023, "step": 1680 }, { "epoch": 0.0029823250255545538, "grad_norm": 0.263671875, "learning_rate": 0.0019997718347528693, "loss": 0.3561, "step": 1682 }, { "epoch": 0.0029858711908643688, "grad_norm": 0.65234375, "learning_rate": 0.0019997704926642966, "loss": 0.4754, "step": 1684 }, { "epoch": 0.002989417356174184, "grad_norm": 0.68359375, "learning_rate": 0.0019997691466406503, "loss": 0.435, "step": 1686 }, { "epoch": 0.002992963521483999, "grad_norm": 0.6953125, "learning_rate": 0.001999767796681937, "loss": 0.3984, "step": 1688 }, { "epoch": 0.0029965096867938146, "grad_norm": 0.4296875, "learning_rate": 0.0019997664427881616, "loss": 0.3234, "step": 1690 }, { "epoch": 0.0030000558521036296, "grad_norm": 0.349609375, "learning_rate": 0.0019997650849593305, "loss": 0.3247, "step": 1692 }, { "epoch": 0.0030036020174134446, "grad_norm": 0.302734375, "learning_rate": 0.001999763723195449, "loss": 0.3037, "step": 1694 }, { "epoch": 0.00300714818272326, "grad_norm": 1.0703125, "learning_rate": 0.0019997623574965246, "loss": 0.3523, "step": 1696 }, { "epoch": 0.003010694348033075, "grad_norm": 0.50390625, "learning_rate": 0.001999760987862562, "loss": 0.3611, "step": 1698 }, { "epoch": 0.0030142405133428904, "grad_norm": 0.4296875, "learning_rate": 0.001999759614293567, "loss": 0.285, "step": 1700 }, { "epoch": 0.0030177866786527054, "grad_norm": 0.482421875, "learning_rate": 0.0019997582367895462, "loss": 0.3267, "step": 1702 }, { "epoch": 0.0030213328439625204, "grad_norm": 0.3671875, "learning_rate": 0.0019997568553505062, "loss": 0.3262, "step": 1704 }, { "epoch": 0.003024879009272336, "grad_norm": 0.279296875, "learning_rate": 0.0019997554699764516, "loss": 0.3819, "step": 1706 }, { "epoch": 0.0030284251745821508, "grad_norm": 0.6953125, "learning_rate": 0.0019997540806673897, "loss": 0.4152, "step": 1708 }, { "epoch": 0.003031971339891966, "grad_norm": 0.52734375, "learning_rate": 0.0019997526874233258, "loss": 0.3476, "step": 1710 }, { "epoch": 0.003035517505201781, "grad_norm": 0.7265625, "learning_rate": 0.0019997512902442667, "loss": 0.3321, "step": 1712 }, { "epoch": 0.0030390636705115966, "grad_norm": 0.84375, "learning_rate": 0.0019997498891302177, "loss": 0.5933, "step": 1714 }, { "epoch": 0.0030426098358214116, "grad_norm": 0.3984375, "learning_rate": 0.001999748484081185, "loss": 0.3428, "step": 1716 }, { "epoch": 0.0030461560011312266, "grad_norm": 0.94140625, "learning_rate": 0.0019997470750971755, "loss": 0.5058, "step": 1718 }, { "epoch": 0.003049702166441042, "grad_norm": 0.427734375, "learning_rate": 0.001999745662178195, "loss": 0.2842, "step": 1720 }, { "epoch": 0.003053248331750857, "grad_norm": 0.6484375, "learning_rate": 0.0019997442453242495, "loss": 0.4308, "step": 1722 }, { "epoch": 0.0030567944970606724, "grad_norm": 0.5234375, "learning_rate": 0.0019997428245353455, "loss": 0.4701, "step": 1724 }, { "epoch": 0.0030603406623704874, "grad_norm": 0.79296875, "learning_rate": 0.001999741399811489, "loss": 0.4284, "step": 1726 }, { "epoch": 0.0030638868276803024, "grad_norm": 0.39453125, "learning_rate": 0.001999739971152686, "loss": 0.3371, "step": 1728 }, { "epoch": 0.003067432992990118, "grad_norm": 0.40625, "learning_rate": 0.001999738538558943, "loss": 0.3227, "step": 1730 }, { "epoch": 0.0030709791582999328, "grad_norm": 0.54296875, "learning_rate": 0.0019997371020302663, "loss": 0.404, "step": 1732 }, { "epoch": 0.003074525323609748, "grad_norm": 0.310546875, "learning_rate": 0.001999735661566662, "loss": 0.331, "step": 1734 }, { "epoch": 0.003078071488919563, "grad_norm": 0.435546875, "learning_rate": 0.0019997342171681367, "loss": 0.3667, "step": 1736 }, { "epoch": 0.003081617654229378, "grad_norm": 2.296875, "learning_rate": 0.0019997327688346966, "loss": 0.3231, "step": 1738 }, { "epoch": 0.0030851638195391936, "grad_norm": 1.6640625, "learning_rate": 0.001999731316566348, "loss": 0.5626, "step": 1740 }, { "epoch": 0.0030887099848490086, "grad_norm": 0.359375, "learning_rate": 0.001999729860363097, "loss": 0.3205, "step": 1742 }, { "epoch": 0.003092256150158824, "grad_norm": 0.84375, "learning_rate": 0.001999728400224951, "loss": 0.4312, "step": 1744 }, { "epoch": 0.003095802315468639, "grad_norm": 0.4453125, "learning_rate": 0.0019997269361519145, "loss": 0.4246, "step": 1746 }, { "epoch": 0.0030993484807784544, "grad_norm": 1.84375, "learning_rate": 0.0019997254681439957, "loss": 0.5008, "step": 1748 }, { "epoch": 0.0031028946460882694, "grad_norm": 0.34375, "learning_rate": 0.0019997239962012, "loss": 0.3183, "step": 1750 }, { "epoch": 0.0031064408113980844, "grad_norm": 0.287109375, "learning_rate": 0.0019997225203235345, "loss": 0.3441, "step": 1752 }, { "epoch": 0.0031099869767079, "grad_norm": 3.25, "learning_rate": 0.001999721040511005, "loss": 0.3552, "step": 1754 }, { "epoch": 0.003113533142017715, "grad_norm": 0.9375, "learning_rate": 0.0019997195567636188, "loss": 0.3398, "step": 1756 }, { "epoch": 0.00311707930732753, "grad_norm": 0.365234375, "learning_rate": 0.0019997180690813814, "loss": 0.3092, "step": 1758 }, { "epoch": 0.003120625472637345, "grad_norm": 0.365234375, "learning_rate": 0.0019997165774643, "loss": 0.3321, "step": 1760 }, { "epoch": 0.00312417163794716, "grad_norm": 0.6640625, "learning_rate": 0.0019997150819123805, "loss": 0.3145, "step": 1762 }, { "epoch": 0.0031277178032569756, "grad_norm": 0.462890625, "learning_rate": 0.0019997135824256305, "loss": 0.3727, "step": 1764 }, { "epoch": 0.0031312639685667906, "grad_norm": 0.5234375, "learning_rate": 0.0019997120790040553, "loss": 0.3252, "step": 1766 }, { "epoch": 0.003134810133876606, "grad_norm": 0.396484375, "learning_rate": 0.0019997105716476624, "loss": 0.3073, "step": 1768 }, { "epoch": 0.003138356299186421, "grad_norm": 1.296875, "learning_rate": 0.0019997090603564582, "loss": 0.4655, "step": 1770 }, { "epoch": 0.003141902464496236, "grad_norm": 0.478515625, "learning_rate": 0.001999707545130449, "loss": 0.2859, "step": 1772 }, { "epoch": 0.0031454486298060514, "grad_norm": 4.625, "learning_rate": 0.0019997060259696418, "loss": 0.318, "step": 1774 }, { "epoch": 0.0031489947951158664, "grad_norm": 0.703125, "learning_rate": 0.0019997045028740425, "loss": 0.3593, "step": 1776 }, { "epoch": 0.003152540960425682, "grad_norm": 1.8359375, "learning_rate": 0.001999702975843659, "loss": 0.3372, "step": 1778 }, { "epoch": 0.003156087125735497, "grad_norm": 0.61328125, "learning_rate": 0.001999701444878497, "loss": 0.362, "step": 1780 }, { "epoch": 0.003159633291045312, "grad_norm": 1.25, "learning_rate": 0.001999699909978563, "loss": 0.491, "step": 1782 }, { "epoch": 0.003163179456355127, "grad_norm": 0.6640625, "learning_rate": 0.001999698371143865, "loss": 0.3574, "step": 1784 }, { "epoch": 0.003166725621664942, "grad_norm": 8.1875, "learning_rate": 0.0019996968283744086, "loss": 0.44, "step": 1786 }, { "epoch": 0.0031702717869747576, "grad_norm": 2.765625, "learning_rate": 0.0019996952816702007, "loss": 0.4284, "step": 1788 }, { "epoch": 0.0031738179522845726, "grad_norm": 0.47265625, "learning_rate": 0.0019996937310312484, "loss": 0.3777, "step": 1790 }, { "epoch": 0.003177364117594388, "grad_norm": 0.322265625, "learning_rate": 0.0019996921764575586, "loss": 0.3025, "step": 1792 }, { "epoch": 0.003180910282904203, "grad_norm": 0.6875, "learning_rate": 0.0019996906179491373, "loss": 0.3498, "step": 1794 }, { "epoch": 0.003184456448214018, "grad_norm": 0.98046875, "learning_rate": 0.001999689055505992, "loss": 0.2658, "step": 1796 }, { "epoch": 0.0031880026135238334, "grad_norm": 0.408203125, "learning_rate": 0.00199968748912813, "loss": 0.3541, "step": 1798 }, { "epoch": 0.0031915487788336484, "grad_norm": 1.0703125, "learning_rate": 0.001999685918815557, "loss": 0.3026, "step": 1800 }, { "epoch": 0.003195094944143464, "grad_norm": 0.27734375, "learning_rate": 0.00199968434456828, "loss": 0.3607, "step": 1802 }, { "epoch": 0.003198641109453279, "grad_norm": 0.75390625, "learning_rate": 0.001999682766386307, "loss": 0.8249, "step": 1804 }, { "epoch": 0.003202187274763094, "grad_norm": 0.240234375, "learning_rate": 0.001999681184269644, "loss": 0.3065, "step": 1806 }, { "epoch": 0.003205733440072909, "grad_norm": 0.515625, "learning_rate": 0.0019996795982182977, "loss": 0.31, "step": 1808 }, { "epoch": 0.003209279605382724, "grad_norm": 1.1171875, "learning_rate": 0.0019996780082322755, "loss": 0.4118, "step": 1810 }, { "epoch": 0.0032128257706925396, "grad_norm": 0.357421875, "learning_rate": 0.0019996764143115848, "loss": 0.3238, "step": 1812 }, { "epoch": 0.0032163719360023546, "grad_norm": 0.359375, "learning_rate": 0.0019996748164562315, "loss": 0.3518, "step": 1814 }, { "epoch": 0.00321991810131217, "grad_norm": 1.1484375, "learning_rate": 0.0019996732146662236, "loss": 0.4987, "step": 1816 }, { "epoch": 0.003223464266621985, "grad_norm": 0.48828125, "learning_rate": 0.001999671608941567, "loss": 0.3448, "step": 1818 }, { "epoch": 0.0032270104319318, "grad_norm": 0.26171875, "learning_rate": 0.00199966999928227, "loss": 0.3469, "step": 1820 }, { "epoch": 0.0032305565972416154, "grad_norm": 1.0234375, "learning_rate": 0.001999668385688339, "loss": 0.356, "step": 1822 }, { "epoch": 0.0032341027625514304, "grad_norm": 0.375, "learning_rate": 0.0019996667681597808, "loss": 0.3797, "step": 1824 }, { "epoch": 0.003237648927861246, "grad_norm": 0.4140625, "learning_rate": 0.0019996651466966026, "loss": 0.3039, "step": 1826 }, { "epoch": 0.003241195093171061, "grad_norm": 4.25, "learning_rate": 0.001999663521298812, "loss": 0.3366, "step": 1828 }, { "epoch": 0.003244741258480876, "grad_norm": 0.30078125, "learning_rate": 0.001999661891966416, "loss": 0.3459, "step": 1830 }, { "epoch": 0.0032482874237906912, "grad_norm": 0.56640625, "learning_rate": 0.0019996602586994207, "loss": 0.3181, "step": 1832 }, { "epoch": 0.003251833589100506, "grad_norm": 0.36328125, "learning_rate": 0.0019996586214978346, "loss": 0.2824, "step": 1834 }, { "epoch": 0.0032553797544103216, "grad_norm": 0.28125, "learning_rate": 0.001999656980361664, "loss": 0.2619, "step": 1836 }, { "epoch": 0.0032589259197201366, "grad_norm": 4.125, "learning_rate": 0.0019996553352909165, "loss": 0.5336, "step": 1838 }, { "epoch": 0.0032624720850299516, "grad_norm": 0.37109375, "learning_rate": 0.0019996536862855988, "loss": 0.2924, "step": 1840 }, { "epoch": 0.003266018250339767, "grad_norm": 0.7578125, "learning_rate": 0.0019996520333457187, "loss": 0.3617, "step": 1842 }, { "epoch": 0.003269564415649582, "grad_norm": 0.8046875, "learning_rate": 0.0019996503764712834, "loss": 0.3407, "step": 1844 }, { "epoch": 0.0032731105809593974, "grad_norm": 0.66015625, "learning_rate": 0.0019996487156622995, "loss": 0.3611, "step": 1846 }, { "epoch": 0.0032766567462692124, "grad_norm": 0.41015625, "learning_rate": 0.001999647050918775, "loss": 0.3965, "step": 1848 }, { "epoch": 0.003280202911579028, "grad_norm": 0.55859375, "learning_rate": 0.001999645382240717, "loss": 0.392, "step": 1850 }, { "epoch": 0.003283749076888843, "grad_norm": 0.67578125, "learning_rate": 0.0019996437096281325, "loss": 0.2987, "step": 1852 }, { "epoch": 0.003287295242198658, "grad_norm": 0.373046875, "learning_rate": 0.0019996420330810286, "loss": 0.374, "step": 1854 }, { "epoch": 0.0032908414075084732, "grad_norm": 0.361328125, "learning_rate": 0.0019996403525994137, "loss": 0.3721, "step": 1856 }, { "epoch": 0.003294387572818288, "grad_norm": 1.375, "learning_rate": 0.001999638668183294, "loss": 0.269, "step": 1858 }, { "epoch": 0.0032979337381281036, "grad_norm": 0.484375, "learning_rate": 0.0019996369798326776, "loss": 0.397, "step": 1860 }, { "epoch": 0.0033014799034379186, "grad_norm": 3.0, "learning_rate": 0.0019996352875475717, "loss": 0.3565, "step": 1862 }, { "epoch": 0.0033050260687477336, "grad_norm": 0.61328125, "learning_rate": 0.0019996335913279833, "loss": 0.3263, "step": 1864 }, { "epoch": 0.003308572234057549, "grad_norm": 0.58984375, "learning_rate": 0.0019996318911739202, "loss": 0.3105, "step": 1866 }, { "epoch": 0.003312118399367364, "grad_norm": 0.65234375, "learning_rate": 0.00199963018708539, "loss": 0.3022, "step": 1868 }, { "epoch": 0.0033156645646771794, "grad_norm": 0.5234375, "learning_rate": 0.0019996284790624, "loss": 0.3436, "step": 1870 }, { "epoch": 0.0033192107299869944, "grad_norm": 0.62890625, "learning_rate": 0.0019996267671049573, "loss": 0.3547, "step": 1872 }, { "epoch": 0.0033227568952968094, "grad_norm": 1.140625, "learning_rate": 0.00199962505121307, "loss": 0.4605, "step": 1874 }, { "epoch": 0.003326303060606625, "grad_norm": 1.15625, "learning_rate": 0.0019996233313867447, "loss": 0.3832, "step": 1876 }, { "epoch": 0.00332984922591644, "grad_norm": 0.302734375, "learning_rate": 0.00199962160762599, "loss": 0.3048, "step": 1878 }, { "epoch": 0.0033333953912262552, "grad_norm": 0.53515625, "learning_rate": 0.001999619879930813, "loss": 0.3729, "step": 1880 }, { "epoch": 0.0033369415565360702, "grad_norm": 0.84375, "learning_rate": 0.001999618148301221, "loss": 0.3066, "step": 1882 }, { "epoch": 0.0033404877218458856, "grad_norm": 0.64453125, "learning_rate": 0.0019996164127372216, "loss": 0.367, "step": 1884 }, { "epoch": 0.0033440338871557006, "grad_norm": 0.326171875, "learning_rate": 0.0019996146732388226, "loss": 0.3087, "step": 1886 }, { "epoch": 0.0033475800524655156, "grad_norm": 0.39453125, "learning_rate": 0.001999612929806032, "loss": 0.3728, "step": 1888 }, { "epoch": 0.003351126217775331, "grad_norm": 1.8125, "learning_rate": 0.0019996111824388567, "loss": 0.378, "step": 1890 }, { "epoch": 0.003354672383085146, "grad_norm": 0.5234375, "learning_rate": 0.001999609431137305, "loss": 0.3537, "step": 1892 }, { "epoch": 0.0033582185483949614, "grad_norm": 0.75390625, "learning_rate": 0.0019996076759013835, "loss": 0.3804, "step": 1894 }, { "epoch": 0.0033617647137047764, "grad_norm": 0.609375, "learning_rate": 0.0019996059167311014, "loss": 0.2966, "step": 1896 }, { "epoch": 0.0033653108790145914, "grad_norm": 0.2890625, "learning_rate": 0.001999604153626465, "loss": 0.4085, "step": 1898 }, { "epoch": 0.003368857044324407, "grad_norm": 0.734375, "learning_rate": 0.0019996023865874826, "loss": 0.3374, "step": 1900 }, { "epoch": 0.003372403209634222, "grad_norm": 1.2109375, "learning_rate": 0.0019996006156141623, "loss": 0.3316, "step": 1902 }, { "epoch": 0.0033759493749440372, "grad_norm": 0.392578125, "learning_rate": 0.001999598840706511, "loss": 0.356, "step": 1904 }, { "epoch": 0.0033794955402538522, "grad_norm": 0.265625, "learning_rate": 0.0019995970618645375, "loss": 0.3072, "step": 1906 }, { "epoch": 0.003383041705563667, "grad_norm": 2.484375, "learning_rate": 0.0019995952790882484, "loss": 0.2967, "step": 1908 }, { "epoch": 0.0033865878708734826, "grad_norm": 0.3984375, "learning_rate": 0.0019995934923776526, "loss": 0.2774, "step": 1910 }, { "epoch": 0.0033901340361832976, "grad_norm": 1.7578125, "learning_rate": 0.0019995917017327568, "loss": 0.566, "step": 1912 }, { "epoch": 0.003393680201493113, "grad_norm": 0.98046875, "learning_rate": 0.00199958990715357, "loss": 0.3883, "step": 1914 }, { "epoch": 0.003397226366802928, "grad_norm": 0.734375, "learning_rate": 0.001999588108640099, "loss": 0.3311, "step": 1916 }, { "epoch": 0.0034007725321127434, "grad_norm": 0.53515625, "learning_rate": 0.0019995863061923525, "loss": 0.3409, "step": 1918 }, { "epoch": 0.0034043186974225584, "grad_norm": 11.0, "learning_rate": 0.001999584499810338, "loss": 0.3615, "step": 1920 }, { "epoch": 0.0034078648627323734, "grad_norm": 0.484375, "learning_rate": 0.0019995826894940636, "loss": 0.3205, "step": 1922 }, { "epoch": 0.003411411028042189, "grad_norm": 4.96875, "learning_rate": 0.001999580875243537, "loss": 0.5128, "step": 1924 }, { "epoch": 0.003414957193352004, "grad_norm": 0.90234375, "learning_rate": 0.0019995790570587657, "loss": 0.5527, "step": 1926 }, { "epoch": 0.0034185033586618192, "grad_norm": 0.52734375, "learning_rate": 0.0019995772349397584, "loss": 0.2791, "step": 1928 }, { "epoch": 0.0034220495239716342, "grad_norm": 0.62109375, "learning_rate": 0.001999575408886523, "loss": 0.2989, "step": 1930 }, { "epoch": 0.0034255956892814492, "grad_norm": 1.0078125, "learning_rate": 0.0019995735788990673, "loss": 0.3708, "step": 1932 }, { "epoch": 0.0034291418545912646, "grad_norm": 0.4375, "learning_rate": 0.0019995717449773995, "loss": 0.5294, "step": 1934 }, { "epoch": 0.0034326880199010796, "grad_norm": 1.3046875, "learning_rate": 0.001999569907121527, "loss": 0.5341, "step": 1936 }, { "epoch": 0.003436234185210895, "grad_norm": 4.8125, "learning_rate": 0.0019995680653314583, "loss": 0.5079, "step": 1938 }, { "epoch": 0.00343978035052071, "grad_norm": 0.796875, "learning_rate": 0.0019995662196072017, "loss": 0.336, "step": 1940 }, { "epoch": 0.003443326515830525, "grad_norm": 0.40625, "learning_rate": 0.001999564369948765, "loss": 0.3488, "step": 1942 }, { "epoch": 0.0034468726811403404, "grad_norm": 0.3984375, "learning_rate": 0.0019995625163561556, "loss": 0.3103, "step": 1944 }, { "epoch": 0.0034504188464501554, "grad_norm": 0.484375, "learning_rate": 0.001999560658829383, "loss": 0.277, "step": 1946 }, { "epoch": 0.003453965011759971, "grad_norm": 1.7109375, "learning_rate": 0.001999558797368454, "loss": 0.3605, "step": 1948 }, { "epoch": 0.003457511177069786, "grad_norm": 0.515625, "learning_rate": 0.001999556931973378, "loss": 0.3346, "step": 1950 }, { "epoch": 0.0034610573423796013, "grad_norm": 6.0625, "learning_rate": 0.001999555062644162, "loss": 0.4588, "step": 1952 }, { "epoch": 0.0034646035076894162, "grad_norm": 1.171875, "learning_rate": 0.001999553189380815, "loss": 0.2776, "step": 1954 }, { "epoch": 0.0034681496729992312, "grad_norm": 1.9296875, "learning_rate": 0.0019995513121833447, "loss": 0.3906, "step": 1956 }, { "epoch": 0.0034716958383090466, "grad_norm": 0.462890625, "learning_rate": 0.0019995494310517596, "loss": 0.3159, "step": 1958 }, { "epoch": 0.0034752420036188616, "grad_norm": 0.26953125, "learning_rate": 0.001999547545986068, "loss": 0.2552, "step": 1960 }, { "epoch": 0.003478788168928677, "grad_norm": 1.1015625, "learning_rate": 0.001999545656986277, "loss": 0.3181, "step": 1962 }, { "epoch": 0.003482334334238492, "grad_norm": 0.494140625, "learning_rate": 0.0019995437640523968, "loss": 0.3241, "step": 1964 }, { "epoch": 0.003485880499548307, "grad_norm": 0.52734375, "learning_rate": 0.0019995418671844346, "loss": 0.4052, "step": 1966 }, { "epoch": 0.0034894266648581224, "grad_norm": 1.4609375, "learning_rate": 0.0019995399663823984, "loss": 0.3428, "step": 1968 }, { "epoch": 0.0034929728301679374, "grad_norm": 1.1796875, "learning_rate": 0.0019995380616462974, "loss": 0.3386, "step": 1970 }, { "epoch": 0.003496518995477753, "grad_norm": 0.55859375, "learning_rate": 0.001999536152976139, "loss": 0.2762, "step": 1972 }, { "epoch": 0.003500065160787568, "grad_norm": 0.5078125, "learning_rate": 0.0019995342403719323, "loss": 0.3334, "step": 1974 }, { "epoch": 0.003503611326097383, "grad_norm": 1.171875, "learning_rate": 0.0019995323238336847, "loss": 0.3141, "step": 1976 }, { "epoch": 0.0035071574914071982, "grad_norm": 0.81640625, "learning_rate": 0.001999530403361406, "loss": 0.4356, "step": 1978 }, { "epoch": 0.0035107036567170132, "grad_norm": 0.326171875, "learning_rate": 0.0019995284789551034, "loss": 0.3265, "step": 1980 }, { "epoch": 0.0035142498220268287, "grad_norm": 0.498046875, "learning_rate": 0.001999526550614786, "loss": 0.6316, "step": 1982 }, { "epoch": 0.0035177959873366436, "grad_norm": 0.34375, "learning_rate": 0.0019995246183404617, "loss": 0.322, "step": 1984 }, { "epoch": 0.0035213421526464586, "grad_norm": 1.4375, "learning_rate": 0.001999522682132139, "loss": 0.3813, "step": 1986 }, { "epoch": 0.003524888317956274, "grad_norm": 1.2890625, "learning_rate": 0.0019995207419898275, "loss": 0.2774, "step": 1988 }, { "epoch": 0.003528434483266089, "grad_norm": 3.953125, "learning_rate": 0.001999518797913534, "loss": 0.6197, "step": 1990 }, { "epoch": 0.0035319806485759045, "grad_norm": 0.3125, "learning_rate": 0.001999516849903268, "loss": 0.2728, "step": 1992 }, { "epoch": 0.0035355268138857194, "grad_norm": 0.314453125, "learning_rate": 0.001999514897959038, "loss": 0.3735, "step": 1994 }, { "epoch": 0.003539072979195535, "grad_norm": 3.046875, "learning_rate": 0.001999512942080852, "loss": 0.3896, "step": 1996 }, { "epoch": 0.00354261914450535, "grad_norm": 0.6328125, "learning_rate": 0.001999510982268719, "loss": 0.4228, "step": 1998 }, { "epoch": 0.003546165309815165, "grad_norm": 0.9375, "learning_rate": 0.0019995090185226474, "loss": 0.3514, "step": 2000 }, { "epoch": 0.0035497114751249803, "grad_norm": 0.2890625, "learning_rate": 0.0019995070508426463, "loss": 0.2548, "step": 2002 }, { "epoch": 0.0035532576404347952, "grad_norm": 0.92578125, "learning_rate": 0.0019995050792287234, "loss": 0.3597, "step": 2004 }, { "epoch": 0.0035568038057446107, "grad_norm": 0.6328125, "learning_rate": 0.001999503103680888, "loss": 0.2934, "step": 2006 }, { "epoch": 0.0035603499710544257, "grad_norm": 0.52734375, "learning_rate": 0.0019995011241991482, "loss": 0.363, "step": 2008 }, { "epoch": 0.0035638961363642406, "grad_norm": 0.875, "learning_rate": 0.0019994991407835134, "loss": 0.4091, "step": 2010 }, { "epoch": 0.003567442301674056, "grad_norm": 6.375, "learning_rate": 0.001999497153433991, "loss": 0.4613, "step": 2012 }, { "epoch": 0.003570988466983871, "grad_norm": 0.60546875, "learning_rate": 0.001999495162150591, "loss": 0.2809, "step": 2014 }, { "epoch": 0.0035745346322936865, "grad_norm": 0.458984375, "learning_rate": 0.0019994931669333224, "loss": 0.3114, "step": 2016 }, { "epoch": 0.0035780807976035015, "grad_norm": 0.376953125, "learning_rate": 0.0019994911677821926, "loss": 0.3755, "step": 2018 }, { "epoch": 0.0035816269629133164, "grad_norm": 1.703125, "learning_rate": 0.001999489164697211, "loss": 0.359, "step": 2020 }, { "epoch": 0.003585173128223132, "grad_norm": 0.73828125, "learning_rate": 0.001999487157678386, "loss": 0.4394, "step": 2022 }, { "epoch": 0.003588719293532947, "grad_norm": 0.5625, "learning_rate": 0.0019994851467257267, "loss": 0.368, "step": 2024 }, { "epoch": 0.0035922654588427623, "grad_norm": 0.9453125, "learning_rate": 0.001999483131839242, "loss": 0.5545, "step": 2026 }, { "epoch": 0.0035958116241525773, "grad_norm": 0.8359375, "learning_rate": 0.0019994811130189406, "loss": 0.3449, "step": 2028 }, { "epoch": 0.0035993577894623927, "grad_norm": 1.28125, "learning_rate": 0.0019994790902648314, "loss": 0.2781, "step": 2030 }, { "epoch": 0.0036029039547722077, "grad_norm": 1.296875, "learning_rate": 0.0019994770635769227, "loss": 0.3364, "step": 2032 }, { "epoch": 0.0036064501200820226, "grad_norm": 2.8125, "learning_rate": 0.001999475032955224, "loss": 0.4186, "step": 2034 }, { "epoch": 0.003609996285391838, "grad_norm": 1.2109375, "learning_rate": 0.0019994729983997444, "loss": 0.3297, "step": 2036 }, { "epoch": 0.003613542450701653, "grad_norm": 0.52734375, "learning_rate": 0.001999470959910492, "loss": 0.4499, "step": 2038 }, { "epoch": 0.0036170886160114685, "grad_norm": 1.0703125, "learning_rate": 0.001999468917487476, "loss": 0.3226, "step": 2040 }, { "epoch": 0.0036206347813212835, "grad_norm": 1.5390625, "learning_rate": 0.001999466871130706, "loss": 0.3072, "step": 2042 }, { "epoch": 0.0036241809466310984, "grad_norm": 1.6875, "learning_rate": 0.0019994648208401895, "loss": 0.4056, "step": 2044 }, { "epoch": 0.003627727111940914, "grad_norm": 0.453125, "learning_rate": 0.001999462766615937, "loss": 0.3269, "step": 2046 }, { "epoch": 0.003631273277250729, "grad_norm": 1.3671875, "learning_rate": 0.001999460708457956, "loss": 0.3396, "step": 2048 }, { "epoch": 0.0036348194425605443, "grad_norm": 0.4375, "learning_rate": 0.0019994586463662574, "loss": 0.347, "step": 2050 }, { "epoch": 0.0036383656078703593, "grad_norm": 0.388671875, "learning_rate": 0.0019994565803408484, "loss": 0.3198, "step": 2052 }, { "epoch": 0.0036419117731801742, "grad_norm": 1.0703125, "learning_rate": 0.0019994545103817394, "loss": 0.3497, "step": 2054 }, { "epoch": 0.0036454579384899897, "grad_norm": 0.49609375, "learning_rate": 0.0019994524364889384, "loss": 0.3626, "step": 2056 }, { "epoch": 0.0036490041037998047, "grad_norm": 0.296875, "learning_rate": 0.0019994503586624555, "loss": 0.2683, "step": 2058 }, { "epoch": 0.00365255026910962, "grad_norm": 2.125, "learning_rate": 0.0019994482769022984, "loss": 0.3466, "step": 2060 }, { "epoch": 0.003656096434419435, "grad_norm": 0.353515625, "learning_rate": 0.001999446191208477, "loss": 0.3426, "step": 2062 }, { "epoch": 0.0036596425997292505, "grad_norm": 0.375, "learning_rate": 0.0019994441015810014, "loss": 0.3127, "step": 2064 }, { "epoch": 0.0036631887650390655, "grad_norm": 0.302734375, "learning_rate": 0.0019994420080198787, "loss": 0.2981, "step": 2066 }, { "epoch": 0.0036667349303488805, "grad_norm": 0.41796875, "learning_rate": 0.0019994399105251197, "loss": 0.3194, "step": 2068 }, { "epoch": 0.003670281095658696, "grad_norm": 0.486328125, "learning_rate": 0.0019994378090967325, "loss": 0.3509, "step": 2070 }, { "epoch": 0.003673827260968511, "grad_norm": 1.375, "learning_rate": 0.001999435703734727, "loss": 0.4092, "step": 2072 }, { "epoch": 0.0036773734262783263, "grad_norm": 0.625, "learning_rate": 0.0019994335944391126, "loss": 0.4644, "step": 2074 }, { "epoch": 0.0036809195915881413, "grad_norm": 0.21875, "learning_rate": 0.0019994314812098977, "loss": 0.2814, "step": 2076 }, { "epoch": 0.0036844657568979563, "grad_norm": 1.171875, "learning_rate": 0.001999429364047092, "loss": 0.3216, "step": 2078 }, { "epoch": 0.0036880119222077717, "grad_norm": 7.21875, "learning_rate": 0.001999427242950705, "loss": 0.3708, "step": 2080 }, { "epoch": 0.0036915580875175867, "grad_norm": 0.58203125, "learning_rate": 0.0019994251179207456, "loss": 0.2891, "step": 2082 }, { "epoch": 0.003695104252827402, "grad_norm": 0.5546875, "learning_rate": 0.001999422988957223, "loss": 0.2936, "step": 2084 }, { "epoch": 0.003698650418137217, "grad_norm": 0.4140625, "learning_rate": 0.0019994208560601464, "loss": 0.3614, "step": 2086 }, { "epoch": 0.003702196583447032, "grad_norm": 0.92578125, "learning_rate": 0.0019994187192295255, "loss": 0.3889, "step": 2088 }, { "epoch": 0.0037057427487568475, "grad_norm": 0.6953125, "learning_rate": 0.00199941657846537, "loss": 0.3378, "step": 2090 }, { "epoch": 0.0037092889140666625, "grad_norm": 0.275390625, "learning_rate": 0.0019994144337676886, "loss": 0.3008, "step": 2092 }, { "epoch": 0.003712835079376478, "grad_norm": 0.390625, "learning_rate": 0.001999412285136491, "loss": 0.3034, "step": 2094 }, { "epoch": 0.003716381244686293, "grad_norm": 0.65625, "learning_rate": 0.0019994101325717865, "loss": 0.3028, "step": 2096 }, { "epoch": 0.0037199274099961083, "grad_norm": 2.390625, "learning_rate": 0.0019994079760735847, "loss": 0.4911, "step": 2098 }, { "epoch": 0.0037234735753059233, "grad_norm": 0.515625, "learning_rate": 0.0019994058156418944, "loss": 0.3719, "step": 2100 }, { "epoch": 0.0037270197406157383, "grad_norm": 0.39453125, "learning_rate": 0.001999403651276726, "loss": 0.3539, "step": 2102 }, { "epoch": 0.0037305659059255537, "grad_norm": 0.21875, "learning_rate": 0.001999401482978088, "loss": 0.5215, "step": 2104 }, { "epoch": 0.0037341120712353687, "grad_norm": 0.359375, "learning_rate": 0.0019993993107459904, "loss": 0.3986, "step": 2106 }, { "epoch": 0.003737658236545184, "grad_norm": 1.390625, "learning_rate": 0.001999397134580443, "loss": 0.4519, "step": 2108 }, { "epoch": 0.003741204401854999, "grad_norm": 0.8515625, "learning_rate": 0.0019993949544814546, "loss": 0.4013, "step": 2110 }, { "epoch": 0.003744750567164814, "grad_norm": 0.89453125, "learning_rate": 0.0019993927704490353, "loss": 0.3381, "step": 2112 }, { "epoch": 0.0037482967324746295, "grad_norm": 1.3671875, "learning_rate": 0.0019993905824831943, "loss": 0.3385, "step": 2114 }, { "epoch": 0.0037518428977844445, "grad_norm": 2.703125, "learning_rate": 0.0019993883905839414, "loss": 0.4074, "step": 2116 }, { "epoch": 0.00375538906309426, "grad_norm": 0.5703125, "learning_rate": 0.001999386194751286, "loss": 0.3746, "step": 2118 }, { "epoch": 0.003758935228404075, "grad_norm": 0.46875, "learning_rate": 0.0019993839949852383, "loss": 0.3088, "step": 2120 }, { "epoch": 0.00376248139371389, "grad_norm": 1.0625, "learning_rate": 0.0019993817912858066, "loss": 0.4044, "step": 2122 }, { "epoch": 0.0037660275590237053, "grad_norm": 0.546875, "learning_rate": 0.001999379583653002, "loss": 0.3365, "step": 2124 }, { "epoch": 0.0037695737243335203, "grad_norm": 0.640625, "learning_rate": 0.0019993773720868332, "loss": 0.3101, "step": 2126 }, { "epoch": 0.0037731198896433357, "grad_norm": 0.4609375, "learning_rate": 0.0019993751565873107, "loss": 0.3699, "step": 2128 }, { "epoch": 0.0037766660549531507, "grad_norm": 0.376953125, "learning_rate": 0.001999372937154443, "loss": 0.3317, "step": 2130 }, { "epoch": 0.003780212220262966, "grad_norm": 0.80859375, "learning_rate": 0.001999370713788241, "loss": 0.3458, "step": 2132 }, { "epoch": 0.003783758385572781, "grad_norm": 0.34375, "learning_rate": 0.001999368486488714, "loss": 0.4345, "step": 2134 }, { "epoch": 0.003787304550882596, "grad_norm": 0.546875, "learning_rate": 0.001999366255255871, "loss": 0.2812, "step": 2136 }, { "epoch": 0.0037908507161924115, "grad_norm": 0.35546875, "learning_rate": 0.001999364020089723, "loss": 0.2801, "step": 2138 }, { "epoch": 0.0037943968815022265, "grad_norm": 0.3125, "learning_rate": 0.0019993617809902792, "loss": 0.2712, "step": 2140 }, { "epoch": 0.003797943046812042, "grad_norm": 0.33203125, "learning_rate": 0.0019993595379575488, "loss": 0.2721, "step": 2142 }, { "epoch": 0.003801489212121857, "grad_norm": 0.28515625, "learning_rate": 0.0019993572909915427, "loss": 0.2625, "step": 2144 }, { "epoch": 0.003805035377431672, "grad_norm": 0.392578125, "learning_rate": 0.00199935504009227, "loss": 0.4411, "step": 2146 }, { "epoch": 0.0038085815427414873, "grad_norm": 0.2890625, "learning_rate": 0.001999352785259741, "loss": 0.3142, "step": 2148 }, { "epoch": 0.0038121277080513023, "grad_norm": 0.7265625, "learning_rate": 0.001999350526493965, "loss": 0.3174, "step": 2150 }, { "epoch": 0.0038156738733611177, "grad_norm": 1.1484375, "learning_rate": 0.0019993482637949526, "loss": 0.419, "step": 2152 }, { "epoch": 0.0038192200386709327, "grad_norm": 0.400390625, "learning_rate": 0.001999345997162713, "loss": 0.3591, "step": 2154 }, { "epoch": 0.0038227662039807477, "grad_norm": 0.3203125, "learning_rate": 0.0019993437265972565, "loss": 0.2762, "step": 2156 }, { "epoch": 0.003826312369290563, "grad_norm": 0.75, "learning_rate": 0.001999341452098593, "loss": 0.3903, "step": 2158 }, { "epoch": 0.003829858534600378, "grad_norm": 0.8984375, "learning_rate": 0.001999339173666732, "loss": 0.3826, "step": 2160 }, { "epoch": 0.0038334046999101935, "grad_norm": 0.22265625, "learning_rate": 0.0019993368913016844, "loss": 0.2625, "step": 2162 }, { "epoch": 0.0038369508652200085, "grad_norm": 0.498046875, "learning_rate": 0.0019993346050034594, "loss": 0.3452, "step": 2164 }, { "epoch": 0.003840497030529824, "grad_norm": 0.4375, "learning_rate": 0.0019993323147720673, "loss": 0.3819, "step": 2166 }, { "epoch": 0.003844043195839639, "grad_norm": 0.9375, "learning_rate": 0.001999330020607518, "loss": 0.2753, "step": 2168 }, { "epoch": 0.003847589361149454, "grad_norm": 1.2578125, "learning_rate": 0.0019993277225098215, "loss": 0.4099, "step": 2170 }, { "epoch": 0.0038511355264592693, "grad_norm": 0.3828125, "learning_rate": 0.001999325420478988, "loss": 0.4247, "step": 2172 }, { "epoch": 0.0038546816917690843, "grad_norm": 0.48828125, "learning_rate": 0.0019993231145150276, "loss": 0.3804, "step": 2174 }, { "epoch": 0.0038582278570788997, "grad_norm": 1.6875, "learning_rate": 0.00199932080461795, "loss": 0.4339, "step": 2176 }, { "epoch": 0.0038617740223887147, "grad_norm": 0.451171875, "learning_rate": 0.001999318490787766, "loss": 0.33, "step": 2178 }, { "epoch": 0.0038653201876985297, "grad_norm": 0.2470703125, "learning_rate": 0.001999316173024485, "loss": 0.3843, "step": 2180 }, { "epoch": 0.003868866353008345, "grad_norm": 0.5, "learning_rate": 0.0019993138513281173, "loss": 0.2957, "step": 2182 }, { "epoch": 0.00387241251831816, "grad_norm": 0.2392578125, "learning_rate": 0.0019993115256986735, "loss": 0.345, "step": 2184 }, { "epoch": 0.0038759586836279755, "grad_norm": 0.3125, "learning_rate": 0.001999309196136163, "loss": 0.2717, "step": 2186 }, { "epoch": 0.0038795048489377905, "grad_norm": 0.55859375, "learning_rate": 0.001999306862640597, "loss": 0.2542, "step": 2188 }, { "epoch": 0.0038830510142476055, "grad_norm": 1.1328125, "learning_rate": 0.0019993045252119854, "loss": 0.3122, "step": 2190 }, { "epoch": 0.003886597179557421, "grad_norm": 3.46875, "learning_rate": 0.0019993021838503374, "loss": 0.4333, "step": 2192 }, { "epoch": 0.003890143344867236, "grad_norm": 0.64453125, "learning_rate": 0.0019992998385556645, "loss": 0.3802, "step": 2194 }, { "epoch": 0.0038936895101770513, "grad_norm": 1.421875, "learning_rate": 0.0019992974893279767, "loss": 0.3688, "step": 2196 }, { "epoch": 0.0038972356754868663, "grad_norm": 0.35546875, "learning_rate": 0.001999295136167284, "loss": 0.3194, "step": 2198 }, { "epoch": 0.0039007818407966817, "grad_norm": 0.337890625, "learning_rate": 0.001999292779073596, "loss": 0.266, "step": 2200 }, { "epoch": 0.0039043280061064967, "grad_norm": 1.0078125, "learning_rate": 0.0019992904180469243, "loss": 0.3561, "step": 2202 }, { "epoch": 0.003907874171416312, "grad_norm": 0.3671875, "learning_rate": 0.0019992880530872786, "loss": 0.3423, "step": 2204 }, { "epoch": 0.003911420336726127, "grad_norm": 0.52734375, "learning_rate": 0.001999285684194669, "loss": 0.2538, "step": 2206 }, { "epoch": 0.003914966502035942, "grad_norm": 0.486328125, "learning_rate": 0.0019992833113691073, "loss": 0.3364, "step": 2208 }, { "epoch": 0.0039185126673457575, "grad_norm": 0.546875, "learning_rate": 0.001999280934610602, "loss": 0.3101, "step": 2210 }, { "epoch": 0.003922058832655572, "grad_norm": 0.4765625, "learning_rate": 0.001999278553919164, "loss": 0.337, "step": 2212 }, { "epoch": 0.0039256049979653875, "grad_norm": 1.1796875, "learning_rate": 0.0019992761692948042, "loss": 0.34, "step": 2214 }, { "epoch": 0.003929151163275203, "grad_norm": 0.5078125, "learning_rate": 0.001999273780737533, "loss": 0.3095, "step": 2216 }, { "epoch": 0.003932697328585018, "grad_norm": 0.5078125, "learning_rate": 0.0019992713882473604, "loss": 0.4177, "step": 2218 }, { "epoch": 0.003936243493894833, "grad_norm": 0.470703125, "learning_rate": 0.0019992689918242974, "loss": 0.3188, "step": 2220 }, { "epoch": 0.003939789659204648, "grad_norm": 0.6171875, "learning_rate": 0.001999266591468354, "loss": 0.327, "step": 2222 }, { "epoch": 0.003943335824514464, "grad_norm": 0.314453125, "learning_rate": 0.0019992641871795407, "loss": 0.3127, "step": 2224 }, { "epoch": 0.003946881989824278, "grad_norm": 0.890625, "learning_rate": 0.0019992617789578683, "loss": 0.274, "step": 2226 }, { "epoch": 0.003950428155134094, "grad_norm": 3.5625, "learning_rate": 0.001999259366803347, "loss": 0.3542, "step": 2228 }, { "epoch": 0.003953974320443909, "grad_norm": 1.6640625, "learning_rate": 0.001999256950715988, "loss": 0.4139, "step": 2230 }, { "epoch": 0.0039575204857537245, "grad_norm": 0.267578125, "learning_rate": 0.0019992545306958013, "loss": 0.3966, "step": 2232 }, { "epoch": 0.003961066651063539, "grad_norm": 0.98046875, "learning_rate": 0.0019992521067427977, "loss": 0.2843, "step": 2234 }, { "epoch": 0.0039646128163733545, "grad_norm": 1.046875, "learning_rate": 0.001999249678856988, "loss": 0.3295, "step": 2236 }, { "epoch": 0.00396815898168317, "grad_norm": 0.375, "learning_rate": 0.001999247247038382, "loss": 0.3355, "step": 2238 }, { "epoch": 0.0039717051469929845, "grad_norm": 0.302734375, "learning_rate": 0.001999244811286991, "loss": 0.2689, "step": 2240 }, { "epoch": 0.0039752513123028, "grad_norm": 0.99609375, "learning_rate": 0.0019992423716028262, "loss": 0.285, "step": 2242 }, { "epoch": 0.003978797477612615, "grad_norm": 0.30859375, "learning_rate": 0.0019992399279858968, "loss": 0.3874, "step": 2244 }, { "epoch": 0.00398234364292243, "grad_norm": 0.64453125, "learning_rate": 0.0019992374804362143, "loss": 0.624, "step": 2246 }, { "epoch": 0.003985889808232245, "grad_norm": 0.30859375, "learning_rate": 0.0019992350289537897, "loss": 0.3005, "step": 2248 }, { "epoch": 0.003989435973542061, "grad_norm": 1.4375, "learning_rate": 0.0019992325735386338, "loss": 0.5908, "step": 2250 }, { "epoch": 0.003992982138851876, "grad_norm": 0.6953125, "learning_rate": 0.001999230114190756, "loss": 0.4056, "step": 2252 }, { "epoch": 0.003996528304161691, "grad_norm": 1.0234375, "learning_rate": 0.0019992276509101688, "loss": 0.3623, "step": 2254 }, { "epoch": 0.004000074469471506, "grad_norm": 1.375, "learning_rate": 0.001999225183696882, "loss": 0.2732, "step": 2256 }, { "epoch": 0.0040036206347813215, "grad_norm": 0.2890625, "learning_rate": 0.0019992227125509065, "loss": 0.2714, "step": 2258 }, { "epoch": 0.004007166800091136, "grad_norm": 0.2734375, "learning_rate": 0.001999220237472253, "loss": 0.3026, "step": 2260 }, { "epoch": 0.0040107129654009515, "grad_norm": 2.21875, "learning_rate": 0.001999217758460933, "loss": 0.6627, "step": 2262 }, { "epoch": 0.004014259130710767, "grad_norm": 0.4609375, "learning_rate": 0.0019992152755169564, "loss": 0.2845, "step": 2264 }, { "epoch": 0.004017805296020582, "grad_norm": 0.291015625, "learning_rate": 0.0019992127886403347, "loss": 0.2716, "step": 2266 }, { "epoch": 0.004021351461330397, "grad_norm": 1.4375, "learning_rate": 0.0019992102978310788, "loss": 0.3575, "step": 2268 }, { "epoch": 0.004024897626640212, "grad_norm": 1.109375, "learning_rate": 0.001999207803089199, "loss": 0.4475, "step": 2270 }, { "epoch": 0.004028443791950028, "grad_norm": 0.734375, "learning_rate": 0.0019992053044147065, "loss": 0.5932, "step": 2272 }, { "epoch": 0.004031989957259842, "grad_norm": 1.03125, "learning_rate": 0.0019992028018076128, "loss": 0.4675, "step": 2274 }, { "epoch": 0.004035536122569658, "grad_norm": 0.6953125, "learning_rate": 0.001999200295267928, "loss": 0.3746, "step": 2276 }, { "epoch": 0.004039082287879473, "grad_norm": 0.431640625, "learning_rate": 0.0019991977847956637, "loss": 0.3107, "step": 2278 }, { "epoch": 0.004042628453189288, "grad_norm": 0.546875, "learning_rate": 0.0019991952703908304, "loss": 0.2674, "step": 2280 }, { "epoch": 0.004046174618499103, "grad_norm": 0.83203125, "learning_rate": 0.0019991927520534394, "loss": 0.3235, "step": 2282 }, { "epoch": 0.0040497207838089185, "grad_norm": 1.2109375, "learning_rate": 0.0019991902297835018, "loss": 0.3161, "step": 2284 }, { "epoch": 0.004053266949118734, "grad_norm": 0.369140625, "learning_rate": 0.001999187703581028, "loss": 0.3673, "step": 2286 }, { "epoch": 0.0040568131144285485, "grad_norm": 1.453125, "learning_rate": 0.00199918517344603, "loss": 0.2897, "step": 2288 }, { "epoch": 0.004060359279738364, "grad_norm": 3.078125, "learning_rate": 0.001999182639378518, "loss": 0.4174, "step": 2290 }, { "epoch": 0.004063905445048179, "grad_norm": 0.322265625, "learning_rate": 0.0019991801013785034, "loss": 0.296, "step": 2292 }, { "epoch": 0.004067451610357994, "grad_norm": 0.431640625, "learning_rate": 0.0019991775594459973, "loss": 0.307, "step": 2294 }, { "epoch": 0.004070997775667809, "grad_norm": 2.171875, "learning_rate": 0.0019991750135810115, "loss": 0.4551, "step": 2296 }, { "epoch": 0.004074543940977625, "grad_norm": 0.9921875, "learning_rate": 0.001999172463783556, "loss": 0.4069, "step": 2298 }, { "epoch": 0.00407809010628744, "grad_norm": 0.79296875, "learning_rate": 0.001999169910053642, "loss": 0.3585, "step": 2300 }, { "epoch": 0.004081636271597255, "grad_norm": 0.5703125, "learning_rate": 0.001999167352391282, "loss": 0.2759, "step": 2302 }, { "epoch": 0.00408518243690707, "grad_norm": 0.7421875, "learning_rate": 0.0019991647907964855, "loss": 0.2841, "step": 2304 }, { "epoch": 0.0040887286022168855, "grad_norm": 0.53515625, "learning_rate": 0.001999162225269265, "loss": 0.4041, "step": 2306 }, { "epoch": 0.0040922747675267, "grad_norm": 0.69921875, "learning_rate": 0.0019991596558096313, "loss": 0.5127, "step": 2308 }, { "epoch": 0.0040958209328365155, "grad_norm": 2.125, "learning_rate": 0.0019991570824175954, "loss": 0.414, "step": 2310 }, { "epoch": 0.004099367098146331, "grad_norm": 0.5, "learning_rate": 0.0019991545050931686, "loss": 0.3223, "step": 2312 }, { "epoch": 0.0041029132634561455, "grad_norm": 0.6953125, "learning_rate": 0.0019991519238363622, "loss": 0.3266, "step": 2314 }, { "epoch": 0.004106459428765961, "grad_norm": 0.82421875, "learning_rate": 0.001999149338647188, "loss": 0.4117, "step": 2316 }, { "epoch": 0.004110005594075776, "grad_norm": 0.44921875, "learning_rate": 0.001999146749525656, "loss": 0.2709, "step": 2318 }, { "epoch": 0.004113551759385592, "grad_norm": 0.3125, "learning_rate": 0.0019991441564717796, "loss": 0.3172, "step": 2320 }, { "epoch": 0.004117097924695406, "grad_norm": 0.7109375, "learning_rate": 0.0019991415594855684, "loss": 0.2935, "step": 2322 }, { "epoch": 0.004120644090005222, "grad_norm": 0.470703125, "learning_rate": 0.001999138958567034, "loss": 0.3147, "step": 2324 }, { "epoch": 0.004124190255315037, "grad_norm": 1.0546875, "learning_rate": 0.0019991363537161886, "loss": 0.2743, "step": 2326 }, { "epoch": 0.004127736420624852, "grad_norm": 0.3828125, "learning_rate": 0.001999133744933043, "loss": 0.2952, "step": 2328 }, { "epoch": 0.004131282585934667, "grad_norm": 0.3828125, "learning_rate": 0.0019991311322176083, "loss": 0.2564, "step": 2330 }, { "epoch": 0.0041348287512444825, "grad_norm": 2.125, "learning_rate": 0.0019991285155698964, "loss": 0.309, "step": 2332 }, { "epoch": 0.004138374916554298, "grad_norm": 0.26953125, "learning_rate": 0.0019991258949899186, "loss": 0.2408, "step": 2334 }, { "epoch": 0.0041419210818641125, "grad_norm": 0.578125, "learning_rate": 0.0019991232704776865, "loss": 0.2607, "step": 2336 }, { "epoch": 0.004145467247173928, "grad_norm": 0.69140625, "learning_rate": 0.001999120642033212, "loss": 0.3421, "step": 2338 }, { "epoch": 0.004149013412483743, "grad_norm": 0.3515625, "learning_rate": 0.0019991180096565054, "loss": 0.4084, "step": 2340 }, { "epoch": 0.004152559577793558, "grad_norm": 0.3515625, "learning_rate": 0.001999115373347579, "loss": 0.2662, "step": 2342 }, { "epoch": 0.004156105743103373, "grad_norm": 0.25, "learning_rate": 0.0019991127331064444, "loss": 0.267, "step": 2344 }, { "epoch": 0.004159651908413189, "grad_norm": 0.47265625, "learning_rate": 0.001999110088933113, "loss": 0.2545, "step": 2346 }, { "epoch": 0.004163198073723003, "grad_norm": 3.28125, "learning_rate": 0.001999107440827596, "loss": 0.358, "step": 2348 }, { "epoch": 0.004166744239032819, "grad_norm": 0.3125, "learning_rate": 0.0019991047887899056, "loss": 0.2937, "step": 2350 }, { "epoch": 0.004170290404342634, "grad_norm": 0.490234375, "learning_rate": 0.0019991021328200536, "loss": 0.2853, "step": 2352 }, { "epoch": 0.0041738365696524496, "grad_norm": 0.55078125, "learning_rate": 0.0019990994729180505, "loss": 0.3396, "step": 2354 }, { "epoch": 0.004177382734962264, "grad_norm": 0.4453125, "learning_rate": 0.0019990968090839085, "loss": 0.4468, "step": 2356 }, { "epoch": 0.0041809289002720795, "grad_norm": 0.427734375, "learning_rate": 0.0019990941413176398, "loss": 0.3123, "step": 2358 }, { "epoch": 0.004184475065581895, "grad_norm": 0.36328125, "learning_rate": 0.001999091469619255, "loss": 0.5217, "step": 2360 }, { "epoch": 0.0041880212308917095, "grad_norm": 0.384765625, "learning_rate": 0.0019990887939887667, "loss": 0.3404, "step": 2362 }, { "epoch": 0.004191567396201525, "grad_norm": 0.33984375, "learning_rate": 0.001999086114426186, "loss": 0.3254, "step": 2364 }, { "epoch": 0.00419511356151134, "grad_norm": 3.875, "learning_rate": 0.0019990834309315253, "loss": 0.5084, "step": 2366 }, { "epoch": 0.004198659726821156, "grad_norm": 0.671875, "learning_rate": 0.001999080743504796, "loss": 0.332, "step": 2368 }, { "epoch": 0.00420220589213097, "grad_norm": 0.6328125, "learning_rate": 0.001999078052146009, "loss": 0.3747, "step": 2370 }, { "epoch": 0.004205752057440786, "grad_norm": 0.7265625, "learning_rate": 0.0019990753568551777, "loss": 0.3267, "step": 2372 }, { "epoch": 0.004209298222750601, "grad_norm": 0.392578125, "learning_rate": 0.0019990726576323125, "loss": 0.3301, "step": 2374 }, { "epoch": 0.004212844388060416, "grad_norm": 0.296875, "learning_rate": 0.001999069954477426, "loss": 0.3264, "step": 2376 }, { "epoch": 0.004216390553370231, "grad_norm": 0.310546875, "learning_rate": 0.0019990672473905297, "loss": 0.2649, "step": 2378 }, { "epoch": 0.0042199367186800466, "grad_norm": 0.6328125, "learning_rate": 0.0019990645363716355, "loss": 0.3235, "step": 2380 }, { "epoch": 0.004223482883989861, "grad_norm": 0.58984375, "learning_rate": 0.001999061821420755, "loss": 0.348, "step": 2382 }, { "epoch": 0.0042270290492996765, "grad_norm": 0.734375, "learning_rate": 0.001999059102537901, "loss": 0.3561, "step": 2384 }, { "epoch": 0.004230575214609492, "grad_norm": 0.298828125, "learning_rate": 0.001999056379723084, "loss": 0.2547, "step": 2386 }, { "epoch": 0.004234121379919307, "grad_norm": 0.546875, "learning_rate": 0.001999053652976317, "loss": 0.326, "step": 2388 }, { "epoch": 0.004237667545229122, "grad_norm": 0.8203125, "learning_rate": 0.0019990509222976116, "loss": 0.37, "step": 2390 }, { "epoch": 0.004241213710538937, "grad_norm": 3.0625, "learning_rate": 0.0019990481876869795, "loss": 0.3745, "step": 2392 }, { "epoch": 0.004244759875848753, "grad_norm": 0.56640625, "learning_rate": 0.001999045449144433, "loss": 0.3181, "step": 2394 }, { "epoch": 0.004248306041158567, "grad_norm": 0.482421875, "learning_rate": 0.0019990427066699837, "loss": 0.2918, "step": 2396 }, { "epoch": 0.004251852206468383, "grad_norm": 0.55078125, "learning_rate": 0.001999039960263644, "loss": 0.2809, "step": 2398 }, { "epoch": 0.004255398371778198, "grad_norm": 0.43359375, "learning_rate": 0.0019990372099254255, "loss": 0.3693, "step": 2400 }, { "epoch": 0.004258944537088014, "grad_norm": 1.0078125, "learning_rate": 0.001999034455655341, "loss": 0.3528, "step": 2402 }, { "epoch": 0.004262490702397828, "grad_norm": 0.2294921875, "learning_rate": 0.0019990316974534015, "loss": 0.2559, "step": 2404 }, { "epoch": 0.0042660368677076435, "grad_norm": 0.267578125, "learning_rate": 0.00199902893531962, "loss": 0.245, "step": 2406 }, { "epoch": 0.004269583033017459, "grad_norm": 0.3515625, "learning_rate": 0.001999026169254008, "loss": 0.343, "step": 2408 }, { "epoch": 0.0042731291983272735, "grad_norm": 0.2890625, "learning_rate": 0.0019990233992565774, "loss": 0.2537, "step": 2410 }, { "epoch": 0.004276675363637089, "grad_norm": 0.44140625, "learning_rate": 0.001999020625327341, "loss": 0.2417, "step": 2412 }, { "epoch": 0.004280221528946904, "grad_norm": 2.484375, "learning_rate": 0.0019990178474663107, "loss": 0.4874, "step": 2414 }, { "epoch": 0.004283767694256719, "grad_norm": 0.5234375, "learning_rate": 0.0019990150656734986, "loss": 0.2428, "step": 2416 }, { "epoch": 0.004287313859566534, "grad_norm": 0.337890625, "learning_rate": 0.001999012279948916, "loss": 0.2781, "step": 2418 }, { "epoch": 0.00429086002487635, "grad_norm": 0.298828125, "learning_rate": 0.001999009490292577, "loss": 0.3514, "step": 2420 }, { "epoch": 0.004294406190186165, "grad_norm": 0.3671875, "learning_rate": 0.001999006696704492, "loss": 0.3882, "step": 2422 }, { "epoch": 0.00429795235549598, "grad_norm": 2.59375, "learning_rate": 0.0019990038991846743, "loss": 0.3699, "step": 2424 }, { "epoch": 0.004301498520805795, "grad_norm": 0.267578125, "learning_rate": 0.0019990010977331356, "loss": 0.3473, "step": 2426 }, { "epoch": 0.0043050446861156106, "grad_norm": 0.5, "learning_rate": 0.0019989982923498883, "loss": 0.2783, "step": 2428 }, { "epoch": 0.004308590851425425, "grad_norm": 2.34375, "learning_rate": 0.0019989954830349444, "loss": 0.4462, "step": 2430 }, { "epoch": 0.0043121370167352405, "grad_norm": 0.439453125, "learning_rate": 0.0019989926697883166, "loss": 0.357, "step": 2432 }, { "epoch": 0.004315683182045056, "grad_norm": 0.333984375, "learning_rate": 0.0019989898526100174, "loss": 0.3523, "step": 2434 }, { "epoch": 0.004319229347354871, "grad_norm": 0.30078125, "learning_rate": 0.0019989870315000585, "loss": 0.2859, "step": 2436 }, { "epoch": 0.004322775512664686, "grad_norm": 0.2412109375, "learning_rate": 0.0019989842064584524, "loss": 0.3051, "step": 2438 }, { "epoch": 0.004326321677974501, "grad_norm": 0.2265625, "learning_rate": 0.0019989813774852123, "loss": 0.2355, "step": 2440 }, { "epoch": 0.004329867843284317, "grad_norm": 0.482421875, "learning_rate": 0.001998978544580349, "loss": 0.4937, "step": 2442 }, { "epoch": 0.004333414008594131, "grad_norm": 0.91015625, "learning_rate": 0.001998975707743876, "loss": 0.4194, "step": 2444 }, { "epoch": 0.004336960173903947, "grad_norm": 0.330078125, "learning_rate": 0.0019989728669758053, "loss": 0.3299, "step": 2446 }, { "epoch": 0.004340506339213762, "grad_norm": 0.41796875, "learning_rate": 0.0019989700222761498, "loss": 0.2959, "step": 2448 }, { "epoch": 0.004344052504523577, "grad_norm": 0.8046875, "learning_rate": 0.0019989671736449213, "loss": 0.345, "step": 2450 }, { "epoch": 0.004347598669833392, "grad_norm": 1.046875, "learning_rate": 0.001998964321082133, "loss": 0.32, "step": 2452 }, { "epoch": 0.0043511448351432076, "grad_norm": 0.4453125, "learning_rate": 0.0019989614645877962, "loss": 0.318, "step": 2454 }, { "epoch": 0.004354691000453023, "grad_norm": 2.46875, "learning_rate": 0.001998958604161925, "loss": 0.3437, "step": 2456 }, { "epoch": 0.0043582371657628375, "grad_norm": 0.365234375, "learning_rate": 0.0019989557398045303, "loss": 0.3221, "step": 2458 }, { "epoch": 0.004361783331072653, "grad_norm": 2.65625, "learning_rate": 0.0019989528715156257, "loss": 0.4671, "step": 2460 }, { "epoch": 0.004365329496382468, "grad_norm": 2.015625, "learning_rate": 0.0019989499992952233, "loss": 0.4874, "step": 2462 }, { "epoch": 0.004368875661692283, "grad_norm": 0.486328125, "learning_rate": 0.001998947123143336, "loss": 0.3161, "step": 2464 }, { "epoch": 0.004372421827002098, "grad_norm": 0.91015625, "learning_rate": 0.001998944243059976, "loss": 0.5244, "step": 2466 }, { "epoch": 0.004375967992311914, "grad_norm": 0.63671875, "learning_rate": 0.0019989413590451558, "loss": 0.5607, "step": 2468 }, { "epoch": 0.004379514157621729, "grad_norm": 0.41015625, "learning_rate": 0.0019989384710988886, "loss": 0.2798, "step": 2470 }, { "epoch": 0.004383060322931544, "grad_norm": 0.51953125, "learning_rate": 0.001998935579221187, "loss": 0.3108, "step": 2472 }, { "epoch": 0.004386606488241359, "grad_norm": 2.90625, "learning_rate": 0.0019989326834120628, "loss": 0.4146, "step": 2474 }, { "epoch": 0.004390152653551175, "grad_norm": 0.41015625, "learning_rate": 0.001998929783671529, "loss": 0.3229, "step": 2476 }, { "epoch": 0.004393698818860989, "grad_norm": 0.431640625, "learning_rate": 0.001998926879999599, "loss": 0.3949, "step": 2478 }, { "epoch": 0.0043972449841708046, "grad_norm": 2.015625, "learning_rate": 0.0019989239723962847, "loss": 0.4253, "step": 2480 }, { "epoch": 0.00440079114948062, "grad_norm": 2.0, "learning_rate": 0.0019989210608615995, "loss": 0.503, "step": 2482 }, { "epoch": 0.0044043373147904345, "grad_norm": 1.8828125, "learning_rate": 0.0019989181453955555, "loss": 0.3762, "step": 2484 }, { "epoch": 0.00440788348010025, "grad_norm": 2.046875, "learning_rate": 0.001998915225998166, "loss": 0.4607, "step": 2486 }, { "epoch": 0.004411429645410065, "grad_norm": 0.314453125, "learning_rate": 0.0019989123026694427, "loss": 0.3172, "step": 2488 }, { "epoch": 0.004414975810719881, "grad_norm": 0.4375, "learning_rate": 0.0019989093754094, "loss": 0.2745, "step": 2490 }, { "epoch": 0.004418521976029695, "grad_norm": 0.2734375, "learning_rate": 0.001998906444218049, "loss": 0.3414, "step": 2492 }, { "epoch": 0.004422068141339511, "grad_norm": 0.71484375, "learning_rate": 0.001998903509095404, "loss": 0.434, "step": 2494 }, { "epoch": 0.004425614306649326, "grad_norm": 0.326171875, "learning_rate": 0.0019989005700414768, "loss": 0.4045, "step": 2496 }, { "epoch": 0.004429160471959141, "grad_norm": 0.59375, "learning_rate": 0.0019988976270562805, "loss": 0.2992, "step": 2498 }, { "epoch": 0.004432706637268956, "grad_norm": 1.546875, "learning_rate": 0.0019988946801398288, "loss": 0.4373, "step": 2500 }, { "epoch": 0.004436252802578772, "grad_norm": 0.5078125, "learning_rate": 0.0019988917292921332, "loss": 0.3932, "step": 2502 }, { "epoch": 0.004439798967888587, "grad_norm": 0.34765625, "learning_rate": 0.001998888774513208, "loss": 0.336, "step": 2504 }, { "epoch": 0.0044433451331984015, "grad_norm": 0.62890625, "learning_rate": 0.001998885815803065, "loss": 0.3264, "step": 2506 }, { "epoch": 0.004446891298508217, "grad_norm": 1.3984375, "learning_rate": 0.0019988828531617175, "loss": 0.3519, "step": 2508 }, { "epoch": 0.004450437463818032, "grad_norm": 1.125, "learning_rate": 0.0019988798865891787, "loss": 0.2821, "step": 2510 }, { "epoch": 0.004453983629127847, "grad_norm": 1.3828125, "learning_rate": 0.0019988769160854615, "loss": 0.3554, "step": 2512 }, { "epoch": 0.004457529794437662, "grad_norm": 1.0859375, "learning_rate": 0.0019988739416505787, "loss": 0.4213, "step": 2514 }, { "epoch": 0.004461075959747478, "grad_norm": 3.515625, "learning_rate": 0.0019988709632845435, "loss": 0.5323, "step": 2516 }, { "epoch": 0.004464622125057292, "grad_norm": 0.828125, "learning_rate": 0.0019988679809873687, "loss": 0.3411, "step": 2518 }, { "epoch": 0.004468168290367108, "grad_norm": 0.36328125, "learning_rate": 0.001998864994759067, "loss": 0.4354, "step": 2520 }, { "epoch": 0.004471714455676923, "grad_norm": 0.36328125, "learning_rate": 0.001998862004599653, "loss": 0.2528, "step": 2522 }, { "epoch": 0.004475260620986739, "grad_norm": 0.8125, "learning_rate": 0.0019988590105091382, "loss": 0.3286, "step": 2524 }, { "epoch": 0.004478806786296553, "grad_norm": 0.380859375, "learning_rate": 0.001998856012487536, "loss": 0.8415, "step": 2526 }, { "epoch": 0.004482352951606369, "grad_norm": 1.6171875, "learning_rate": 0.00199885301053486, "loss": 0.3309, "step": 2528 }, { "epoch": 0.004485899116916184, "grad_norm": 1.109375, "learning_rate": 0.0019988500046511227, "loss": 0.3953, "step": 2530 }, { "epoch": 0.0044894452822259985, "grad_norm": 0.44921875, "learning_rate": 0.0019988469948363377, "loss": 0.3209, "step": 2532 }, { "epoch": 0.004492991447535814, "grad_norm": 0.58984375, "learning_rate": 0.0019988439810905184, "loss": 0.3589, "step": 2534 }, { "epoch": 0.004496537612845629, "grad_norm": 0.498046875, "learning_rate": 0.001998840963413678, "loss": 0.3395, "step": 2536 }, { "epoch": 0.004500083778155445, "grad_norm": 0.4609375, "learning_rate": 0.001998837941805828, "loss": 0.2942, "step": 2538 }, { "epoch": 0.004503629943465259, "grad_norm": 0.373046875, "learning_rate": 0.001998834916266984, "loss": 0.3399, "step": 2540 }, { "epoch": 0.004507176108775075, "grad_norm": 2.40625, "learning_rate": 0.001998831886797158, "loss": 0.3492, "step": 2542 }, { "epoch": 0.00451072227408489, "grad_norm": 0.2734375, "learning_rate": 0.0019988288533963634, "loss": 0.2839, "step": 2544 }, { "epoch": 0.004514268439394705, "grad_norm": 0.37109375, "learning_rate": 0.001998825816064613, "loss": 0.2991, "step": 2546 }, { "epoch": 0.00451781460470452, "grad_norm": 0.1953125, "learning_rate": 0.0019988227748019213, "loss": 0.2732, "step": 2548 }, { "epoch": 0.004521360770014336, "grad_norm": 0.96484375, "learning_rate": 0.0019988197296083, "loss": 0.411, "step": 2550 }, { "epoch": 0.00452490693532415, "grad_norm": 0.51953125, "learning_rate": 0.0019988166804837644, "loss": 0.5054, "step": 2552 }, { "epoch": 0.0045284531006339656, "grad_norm": 0.35546875, "learning_rate": 0.001998813627428326, "loss": 0.2754, "step": 2554 }, { "epoch": 0.004531999265943781, "grad_norm": 0.2109375, "learning_rate": 0.0019988105704419994, "loss": 0.271, "step": 2556 }, { "epoch": 0.004535545431253596, "grad_norm": 0.625, "learning_rate": 0.001998807509524797, "loss": 0.3684, "step": 2558 }, { "epoch": 0.004539091596563411, "grad_norm": 0.49609375, "learning_rate": 0.001998804444676733, "loss": 0.3516, "step": 2560 }, { "epoch": 0.004542637761873226, "grad_norm": 0.40625, "learning_rate": 0.0019988013758978204, "loss": 0.3089, "step": 2562 }, { "epoch": 0.004546183927183042, "grad_norm": 0.447265625, "learning_rate": 0.0019987983031880723, "loss": 0.3493, "step": 2564 }, { "epoch": 0.004549730092492856, "grad_norm": 0.365234375, "learning_rate": 0.001998795226547503, "loss": 0.2243, "step": 2566 }, { "epoch": 0.004553276257802672, "grad_norm": 4.3125, "learning_rate": 0.001998792145976125, "loss": 0.413, "step": 2568 }, { "epoch": 0.004556822423112487, "grad_norm": 0.60546875, "learning_rate": 0.001998789061473952, "loss": 0.3219, "step": 2570 }, { "epoch": 0.004560368588422303, "grad_norm": 0.388671875, "learning_rate": 0.0019987859730409984, "loss": 0.2752, "step": 2572 }, { "epoch": 0.004563914753732117, "grad_norm": 0.6328125, "learning_rate": 0.001998782880677277, "loss": 0.4357, "step": 2574 }, { "epoch": 0.004567460919041933, "grad_norm": 0.5390625, "learning_rate": 0.001998779784382801, "loss": 0.3183, "step": 2576 }, { "epoch": 0.004571007084351748, "grad_norm": 0.9609375, "learning_rate": 0.001998776684157584, "loss": 0.4608, "step": 2578 }, { "epoch": 0.0045745532496615626, "grad_norm": 0.337890625, "learning_rate": 0.0019987735800016406, "loss": 0.371, "step": 2580 }, { "epoch": 0.004578099414971378, "grad_norm": 1.4453125, "learning_rate": 0.0019987704719149836, "loss": 0.2915, "step": 2582 }, { "epoch": 0.004581645580281193, "grad_norm": 1.0625, "learning_rate": 0.0019987673598976263, "loss": 0.4373, "step": 2584 }, { "epoch": 0.004585191745591008, "grad_norm": 0.265625, "learning_rate": 0.0019987642439495824, "loss": 0.272, "step": 2586 }, { "epoch": 0.004588737910900823, "grad_norm": 1.53125, "learning_rate": 0.001998761124070866, "loss": 0.484, "step": 2588 }, { "epoch": 0.004592284076210639, "grad_norm": 1.390625, "learning_rate": 0.0019987580002614907, "loss": 0.3142, "step": 2590 }, { "epoch": 0.004595830241520454, "grad_norm": 0.453125, "learning_rate": 0.0019987548725214697, "loss": 0.3708, "step": 2592 }, { "epoch": 0.004599376406830269, "grad_norm": 0.376953125, "learning_rate": 0.0019987517408508173, "loss": 0.3678, "step": 2594 }, { "epoch": 0.004602922572140084, "grad_norm": 1.796875, "learning_rate": 0.001998748605249546, "loss": 0.3708, "step": 2596 }, { "epoch": 0.0046064687374499, "grad_norm": 0.54296875, "learning_rate": 0.001998745465717671, "loss": 0.244, "step": 2598 }, { "epoch": 0.004610014902759714, "grad_norm": 1.1796875, "learning_rate": 0.0019987423222552056, "loss": 0.3241, "step": 2600 }, { "epoch": 0.00461356106806953, "grad_norm": 1.5625, "learning_rate": 0.001998739174862163, "loss": 0.3932, "step": 2602 }, { "epoch": 0.004617107233379345, "grad_norm": 0.3984375, "learning_rate": 0.0019987360235385575, "loss": 0.2386, "step": 2604 }, { "epoch": 0.00462065339868916, "grad_norm": 0.5625, "learning_rate": 0.0019987328682844027, "loss": 0.2481, "step": 2606 }, { "epoch": 0.004624199563998975, "grad_norm": 0.26953125, "learning_rate": 0.0019987297090997124, "loss": 0.3161, "step": 2608 }, { "epoch": 0.00462774572930879, "grad_norm": 1.4296875, "learning_rate": 0.0019987265459845, "loss": 0.658, "step": 2610 }, { "epoch": 0.004631291894618606, "grad_norm": 0.671875, "learning_rate": 0.00199872337893878, "loss": 0.3221, "step": 2612 }, { "epoch": 0.00463483805992842, "grad_norm": 0.9609375, "learning_rate": 0.001998720207962566, "loss": 0.4271, "step": 2614 }, { "epoch": 0.004638384225238236, "grad_norm": 0.21484375, "learning_rate": 0.0019987170330558715, "loss": 0.313, "step": 2616 }, { "epoch": 0.004641930390548051, "grad_norm": 1.046875, "learning_rate": 0.001998713854218711, "loss": 0.3507, "step": 2618 }, { "epoch": 0.004645476555857866, "grad_norm": 0.30859375, "learning_rate": 0.001998710671451098, "loss": 0.2538, "step": 2620 }, { "epoch": 0.004649022721167681, "grad_norm": 1.046875, "learning_rate": 0.001998707484753047, "loss": 0.3987, "step": 2622 }, { "epoch": 0.004652568886477497, "grad_norm": 0.54296875, "learning_rate": 0.001998704294124571, "loss": 0.3039, "step": 2624 }, { "epoch": 0.004656115051787312, "grad_norm": 0.36328125, "learning_rate": 0.001998701099565685, "loss": 0.3143, "step": 2626 }, { "epoch": 0.004659661217097127, "grad_norm": 0.34765625, "learning_rate": 0.001998697901076402, "loss": 0.3227, "step": 2628 }, { "epoch": 0.004663207382406942, "grad_norm": 0.365234375, "learning_rate": 0.0019986946986567363, "loss": 0.335, "step": 2630 }, { "epoch": 0.004666753547716757, "grad_norm": 1.6796875, "learning_rate": 0.0019986914923067027, "loss": 0.3679, "step": 2632 }, { "epoch": 0.004670299713026572, "grad_norm": 0.6328125, "learning_rate": 0.0019986882820263135, "loss": 0.3196, "step": 2634 }, { "epoch": 0.004673845878336387, "grad_norm": 0.369140625, "learning_rate": 0.0019986850678155844, "loss": 0.2708, "step": 2636 }, { "epoch": 0.004677392043646203, "grad_norm": 0.4296875, "learning_rate": 0.001998681849674529, "loss": 0.2926, "step": 2638 }, { "epoch": 0.004680938208956018, "grad_norm": 1.7109375, "learning_rate": 0.0019986786276031607, "loss": 0.2916, "step": 2640 }, { "epoch": 0.004684484374265833, "grad_norm": 0.4375, "learning_rate": 0.0019986754016014944, "loss": 0.3816, "step": 2642 }, { "epoch": 0.004688030539575648, "grad_norm": 0.33203125, "learning_rate": 0.001998672171669544, "loss": 0.4013, "step": 2644 }, { "epoch": 0.004691576704885464, "grad_norm": 0.375, "learning_rate": 0.001998668937807323, "loss": 0.3108, "step": 2646 }, { "epoch": 0.004695122870195278, "grad_norm": 0.197265625, "learning_rate": 0.0019986657000148466, "loss": 0.2387, "step": 2648 }, { "epoch": 0.004698669035505094, "grad_norm": 4.40625, "learning_rate": 0.001998662458292128, "loss": 0.3733, "step": 2650 }, { "epoch": 0.004702215200814909, "grad_norm": 0.51953125, "learning_rate": 0.001998659212639182, "loss": 0.4222, "step": 2652 }, { "epoch": 0.0047057613661247236, "grad_norm": 0.75, "learning_rate": 0.001998655963056023, "loss": 0.2702, "step": 2654 }, { "epoch": 0.004709307531434539, "grad_norm": 0.2734375, "learning_rate": 0.0019986527095426647, "loss": 0.2796, "step": 2656 }, { "epoch": 0.004712853696744354, "grad_norm": 1.46875, "learning_rate": 0.0019986494520991215, "loss": 0.3217, "step": 2658 }, { "epoch": 0.00471639986205417, "grad_norm": 1.015625, "learning_rate": 0.001998646190725407, "loss": 0.3625, "step": 2660 }, { "epoch": 0.004719946027363984, "grad_norm": 0.298828125, "learning_rate": 0.0019986429254215364, "loss": 0.2949, "step": 2662 }, { "epoch": 0.0047234921926738, "grad_norm": 0.328125, "learning_rate": 0.0019986396561875236, "loss": 0.2631, "step": 2664 }, { "epoch": 0.004727038357983615, "grad_norm": 2.890625, "learning_rate": 0.0019986363830233832, "loss": 0.4082, "step": 2666 }, { "epoch": 0.00473058452329343, "grad_norm": 0.734375, "learning_rate": 0.001998633105929129, "loss": 0.4736, "step": 2668 }, { "epoch": 0.004734130688603245, "grad_norm": 0.44921875, "learning_rate": 0.0019986298249047756, "loss": 0.329, "step": 2670 }, { "epoch": 0.004737676853913061, "grad_norm": 0.470703125, "learning_rate": 0.0019986265399503374, "loss": 0.4027, "step": 2672 }, { "epoch": 0.004741223019222876, "grad_norm": 0.5625, "learning_rate": 0.0019986232510658284, "loss": 0.2617, "step": 2674 }, { "epoch": 0.004744769184532691, "grad_norm": 0.5078125, "learning_rate": 0.001998619958251264, "loss": 0.3261, "step": 2676 }, { "epoch": 0.004748315349842506, "grad_norm": 1.3515625, "learning_rate": 0.001998616661506657, "loss": 0.396, "step": 2678 }, { "epoch": 0.004751861515152321, "grad_norm": 0.63671875, "learning_rate": 0.0019986133608320233, "loss": 0.3203, "step": 2680 }, { "epoch": 0.004755407680462136, "grad_norm": 1.3515625, "learning_rate": 0.0019986100562273765, "loss": 0.3113, "step": 2682 }, { "epoch": 0.004758953845771951, "grad_norm": 2.625, "learning_rate": 0.0019986067476927315, "loss": 0.5206, "step": 2684 }, { "epoch": 0.004762500011081767, "grad_norm": 1.515625, "learning_rate": 0.0019986034352281025, "loss": 0.4123, "step": 2686 }, { "epoch": 0.004766046176391581, "grad_norm": 0.33984375, "learning_rate": 0.001998600118833504, "loss": 0.2106, "step": 2688 }, { "epoch": 0.004769592341701397, "grad_norm": 0.48828125, "learning_rate": 0.0019985967985089504, "loss": 0.3059, "step": 2690 }, { "epoch": 0.004773138507011212, "grad_norm": 1.921875, "learning_rate": 0.0019985934742544564, "loss": 0.3459, "step": 2692 }, { "epoch": 0.004776684672321028, "grad_norm": 1.921875, "learning_rate": 0.0019985901460700365, "loss": 0.3537, "step": 2694 }, { "epoch": 0.004780230837630842, "grad_norm": 0.51171875, "learning_rate": 0.0019985868139557055, "loss": 0.3089, "step": 2696 }, { "epoch": 0.004783777002940658, "grad_norm": 1.015625, "learning_rate": 0.0019985834779114777, "loss": 0.4324, "step": 2698 }, { "epoch": 0.004787323168250473, "grad_norm": 1.8671875, "learning_rate": 0.0019985801379373675, "loss": 0.3557, "step": 2700 }, { "epoch": 0.004790869333560288, "grad_norm": 0.77734375, "learning_rate": 0.00199857679403339, "loss": 0.3592, "step": 2702 }, { "epoch": 0.004794415498870103, "grad_norm": 0.4921875, "learning_rate": 0.001998573446199559, "loss": 0.3282, "step": 2704 }, { "epoch": 0.004797961664179918, "grad_norm": 0.423828125, "learning_rate": 0.001998570094435891, "loss": 0.5474, "step": 2706 }, { "epoch": 0.004801507829489734, "grad_norm": 0.4375, "learning_rate": 0.0019985667387423978, "loss": 0.3031, "step": 2708 }, { "epoch": 0.004805053994799548, "grad_norm": 0.82421875, "learning_rate": 0.0019985633791190964, "loss": 0.3576, "step": 2710 }, { "epoch": 0.004808600160109364, "grad_norm": 1.328125, "learning_rate": 0.0019985600155660007, "loss": 0.4023, "step": 2712 }, { "epoch": 0.004812146325419179, "grad_norm": 0.98828125, "learning_rate": 0.001998556648083125, "loss": 0.2578, "step": 2714 }, { "epoch": 0.004815692490728994, "grad_norm": 0.4296875, "learning_rate": 0.001998553276670485, "loss": 0.2773, "step": 2716 }, { "epoch": 0.004819238656038809, "grad_norm": 0.31640625, "learning_rate": 0.0019985499013280947, "loss": 0.3085, "step": 2718 }, { "epoch": 0.004822784821348625, "grad_norm": 0.2890625, "learning_rate": 0.001998546522055969, "loss": 0.3116, "step": 2720 }, { "epoch": 0.004826330986658439, "grad_norm": 0.87109375, "learning_rate": 0.0019985431388541233, "loss": 0.4739, "step": 2722 }, { "epoch": 0.004829877151968255, "grad_norm": 0.69140625, "learning_rate": 0.001998539751722571, "loss": 0.3929, "step": 2724 }, { "epoch": 0.00483342331727807, "grad_norm": 1.2890625, "learning_rate": 0.0019985363606613285, "loss": 0.3235, "step": 2726 }, { "epoch": 0.0048369694825878854, "grad_norm": 0.765625, "learning_rate": 0.0019985329656704094, "loss": 0.2474, "step": 2728 }, { "epoch": 0.0048405156478977, "grad_norm": 0.6015625, "learning_rate": 0.001998529566749829, "loss": 0.354, "step": 2730 }, { "epoch": 0.004844061813207515, "grad_norm": 0.5703125, "learning_rate": 0.0019985261638996027, "loss": 0.2822, "step": 2732 }, { "epoch": 0.004847607978517331, "grad_norm": 0.5390625, "learning_rate": 0.0019985227571197445, "loss": 0.3357, "step": 2734 }, { "epoch": 0.004851154143827145, "grad_norm": 0.4296875, "learning_rate": 0.00199851934641027, "loss": 0.3095, "step": 2736 }, { "epoch": 0.004854700309136961, "grad_norm": 1.2578125, "learning_rate": 0.0019985159317711934, "loss": 0.554, "step": 2738 }, { "epoch": 0.004858246474446776, "grad_norm": 0.306640625, "learning_rate": 0.0019985125132025304, "loss": 0.3431, "step": 2740 }, { "epoch": 0.004861792639756591, "grad_norm": 0.6328125, "learning_rate": 0.001998509090704295, "loss": 0.3463, "step": 2742 }, { "epoch": 0.004865338805066406, "grad_norm": 0.302734375, "learning_rate": 0.001998505664276503, "loss": 0.298, "step": 2744 }, { "epoch": 0.004868884970376222, "grad_norm": 0.498046875, "learning_rate": 0.0019985022339191697, "loss": 0.2978, "step": 2746 }, { "epoch": 0.004872431135686037, "grad_norm": 0.74609375, "learning_rate": 0.0019984987996323087, "loss": 0.3427, "step": 2748 }, { "epoch": 0.004875977300995852, "grad_norm": 0.310546875, "learning_rate": 0.0019984953614159365, "loss": 0.3363, "step": 2750 }, { "epoch": 0.004879523466305667, "grad_norm": 0.2314453125, "learning_rate": 0.0019984919192700674, "loss": 0.2652, "step": 2752 }, { "epoch": 0.0048830696316154824, "grad_norm": 1.0625, "learning_rate": 0.0019984884731947166, "loss": 0.3352, "step": 2754 }, { "epoch": 0.004886615796925297, "grad_norm": 0.4921875, "learning_rate": 0.001998485023189899, "loss": 0.3254, "step": 2756 }, { "epoch": 0.004890161962235112, "grad_norm": 0.97265625, "learning_rate": 0.0019984815692556295, "loss": 0.4867, "step": 2758 }, { "epoch": 0.004893708127544928, "grad_norm": 0.765625, "learning_rate": 0.001998478111391924, "loss": 0.4473, "step": 2760 }, { "epoch": 0.004897254292854743, "grad_norm": 0.296875, "learning_rate": 0.0019984746495987967, "loss": 0.3472, "step": 2762 }, { "epoch": 0.004900800458164558, "grad_norm": 0.98046875, "learning_rate": 0.0019984711838762635, "loss": 0.4279, "step": 2764 }, { "epoch": 0.004904346623474373, "grad_norm": 3.09375, "learning_rate": 0.0019984677142243393, "loss": 0.3315, "step": 2766 }, { "epoch": 0.004907892788784189, "grad_norm": 0.734375, "learning_rate": 0.001998464240643039, "loss": 0.5489, "step": 2768 }, { "epoch": 0.004911438954094003, "grad_norm": 0.62890625, "learning_rate": 0.001998460763132378, "loss": 0.3425, "step": 2770 }, { "epoch": 0.004914985119403819, "grad_norm": 0.44140625, "learning_rate": 0.0019984572816923716, "loss": 0.4468, "step": 2772 }, { "epoch": 0.004918531284713634, "grad_norm": 0.3125, "learning_rate": 0.0019984537963230347, "loss": 0.2923, "step": 2774 }, { "epoch": 0.004922077450023449, "grad_norm": 0.83203125, "learning_rate": 0.001998450307024383, "loss": 0.3882, "step": 2776 }, { "epoch": 0.004925623615333264, "grad_norm": 0.78125, "learning_rate": 0.001998446813796432, "loss": 0.4952, "step": 2778 }, { "epoch": 0.004929169780643079, "grad_norm": 0.59375, "learning_rate": 0.0019984433166391957, "loss": 0.3895, "step": 2780 }, { "epoch": 0.004932715945952895, "grad_norm": 0.2890625, "learning_rate": 0.0019984398155526903, "loss": 0.2522, "step": 2782 }, { "epoch": 0.004936262111262709, "grad_norm": 0.330078125, "learning_rate": 0.001998436310536932, "loss": 0.2946, "step": 2784 }, { "epoch": 0.004939808276572525, "grad_norm": 0.72265625, "learning_rate": 0.001998432801591934, "loss": 0.3808, "step": 2786 }, { "epoch": 0.00494335444188234, "grad_norm": 0.4375, "learning_rate": 0.0019984292887177133, "loss": 0.4355, "step": 2788 }, { "epoch": 0.004946900607192155, "grad_norm": 1.984375, "learning_rate": 0.0019984257719142845, "loss": 0.4189, "step": 2790 }, { "epoch": 0.00495044677250197, "grad_norm": 0.6171875, "learning_rate": 0.0019984222511816633, "loss": 0.3627, "step": 2792 }, { "epoch": 0.004953992937811786, "grad_norm": 0.859375, "learning_rate": 0.001998418726519865, "loss": 0.3463, "step": 2794 }, { "epoch": 0.004957539103121601, "grad_norm": 0.37109375, "learning_rate": 0.0019984151979289054, "loss": 0.343, "step": 2796 }, { "epoch": 0.004961085268431416, "grad_norm": 1.1328125, "learning_rate": 0.0019984116654087995, "loss": 0.3639, "step": 2798 }, { "epoch": 0.004964631433741231, "grad_norm": 0.404296875, "learning_rate": 0.0019984081289595628, "loss": 0.3211, "step": 2800 }, { "epoch": 0.0049681775990510465, "grad_norm": 0.81640625, "learning_rate": 0.0019984045885812107, "loss": 0.4205, "step": 2802 }, { "epoch": 0.004971723764360861, "grad_norm": 0.4453125, "learning_rate": 0.0019984010442737586, "loss": 0.2752, "step": 2804 }, { "epoch": 0.004975269929670676, "grad_norm": 0.671875, "learning_rate": 0.0019983974960372224, "loss": 0.4363, "step": 2806 }, { "epoch": 0.004978816094980492, "grad_norm": 0.455078125, "learning_rate": 0.001998393943871618, "loss": 0.3035, "step": 2808 }, { "epoch": 0.004982362260290306, "grad_norm": 0.4609375, "learning_rate": 0.001998390387776959, "loss": 0.3992, "step": 2810 }, { "epoch": 0.004985908425600122, "grad_norm": 0.59765625, "learning_rate": 0.0019983868277532635, "loss": 0.4628, "step": 2812 }, { "epoch": 0.004989454590909937, "grad_norm": 0.9453125, "learning_rate": 0.001998383263800545, "loss": 0.3996, "step": 2814 }, { "epoch": 0.004993000756219753, "grad_norm": 0.255859375, "learning_rate": 0.0019983796959188206, "loss": 0.3718, "step": 2816 }, { "epoch": 0.004996546921529567, "grad_norm": 3.203125, "learning_rate": 0.001998376124108105, "loss": 0.4216, "step": 2818 }, { "epoch": 0.005000093086839383, "grad_norm": 1.2578125, "learning_rate": 0.001998372548368414, "loss": 0.3556, "step": 2820 }, { "epoch": 0.005003639252149198, "grad_norm": 0.26171875, "learning_rate": 0.001998368968699763, "loss": 0.2456, "step": 2822 }, { "epoch": 0.005007185417459013, "grad_norm": 1.7265625, "learning_rate": 0.0019983653851021687, "loss": 0.4176, "step": 2824 }, { "epoch": 0.005010731582768828, "grad_norm": 0.60546875, "learning_rate": 0.0019983617975756454, "loss": 0.3417, "step": 2826 }, { "epoch": 0.0050142777480786434, "grad_norm": 0.275390625, "learning_rate": 0.00199835820612021, "loss": 0.2935, "step": 2828 }, { "epoch": 0.005017823913388459, "grad_norm": 0.2470703125, "learning_rate": 0.001998354610735877, "loss": 0.2535, "step": 2830 }, { "epoch": 0.005021370078698273, "grad_norm": 0.6875, "learning_rate": 0.0019983510114226634, "loss": 0.2801, "step": 2832 }, { "epoch": 0.005024916244008089, "grad_norm": 0.38671875, "learning_rate": 0.001998347408180584, "loss": 0.2713, "step": 2834 }, { "epoch": 0.005028462409317904, "grad_norm": 0.54296875, "learning_rate": 0.001998343801009655, "loss": 0.3337, "step": 2836 }, { "epoch": 0.005032008574627719, "grad_norm": 0.33203125, "learning_rate": 0.001998340189909892, "loss": 0.2395, "step": 2838 }, { "epoch": 0.005035554739937534, "grad_norm": 0.388671875, "learning_rate": 0.0019983365748813104, "loss": 0.3079, "step": 2840 }, { "epoch": 0.00503910090524735, "grad_norm": 0.4765625, "learning_rate": 0.001998332955923927, "loss": 0.4155, "step": 2842 }, { "epoch": 0.005042647070557164, "grad_norm": 0.470703125, "learning_rate": 0.0019983293330377567, "loss": 0.3849, "step": 2844 }, { "epoch": 0.00504619323586698, "grad_norm": 0.373046875, "learning_rate": 0.001998325706222816, "loss": 0.3271, "step": 2846 }, { "epoch": 0.005049739401176795, "grad_norm": 0.7421875, "learning_rate": 0.00199832207547912, "loss": 0.2931, "step": 2848 }, { "epoch": 0.0050532855664866105, "grad_norm": 0.466796875, "learning_rate": 0.0019983184408066857, "loss": 0.3996, "step": 2850 }, { "epoch": 0.005056831731796425, "grad_norm": 0.63671875, "learning_rate": 0.001998314802205528, "loss": 0.2881, "step": 2852 }, { "epoch": 0.0050603778971062404, "grad_norm": 0.427734375, "learning_rate": 0.001998311159675663, "loss": 0.2577, "step": 2854 }, { "epoch": 0.005063924062416056, "grad_norm": 0.5625, "learning_rate": 0.0019983075132171068, "loss": 0.3623, "step": 2856 }, { "epoch": 0.00506747022772587, "grad_norm": 0.2294921875, "learning_rate": 0.0019983038628298756, "loss": 0.2591, "step": 2858 }, { "epoch": 0.005071016393035686, "grad_norm": 1.046875, "learning_rate": 0.001998300208513985, "loss": 0.3839, "step": 2860 }, { "epoch": 0.005074562558345501, "grad_norm": 0.267578125, "learning_rate": 0.001998296550269451, "loss": 0.3262, "step": 2862 }, { "epoch": 0.005078108723655317, "grad_norm": 0.283203125, "learning_rate": 0.0019982928880962897, "loss": 0.3527, "step": 2864 }, { "epoch": 0.005081654888965131, "grad_norm": 0.4375, "learning_rate": 0.0019982892219945173, "loss": 0.3176, "step": 2866 }, { "epoch": 0.005085201054274947, "grad_norm": 0.248046875, "learning_rate": 0.0019982855519641496, "loss": 0.2781, "step": 2868 }, { "epoch": 0.005088747219584762, "grad_norm": 0.306640625, "learning_rate": 0.0019982818780052026, "loss": 0.2864, "step": 2870 }, { "epoch": 0.005092293384894577, "grad_norm": 0.953125, "learning_rate": 0.0019982782001176924, "loss": 0.3155, "step": 2872 }, { "epoch": 0.005095839550204392, "grad_norm": 0.6015625, "learning_rate": 0.0019982745183016347, "loss": 0.4162, "step": 2874 }, { "epoch": 0.0050993857155142075, "grad_norm": 0.640625, "learning_rate": 0.001998270832557047, "loss": 0.3198, "step": 2876 }, { "epoch": 0.005102931880824022, "grad_norm": 0.546875, "learning_rate": 0.001998267142883944, "loss": 0.287, "step": 2878 }, { "epoch": 0.0051064780461338374, "grad_norm": 0.251953125, "learning_rate": 0.001998263449282342, "loss": 0.2977, "step": 2880 }, { "epoch": 0.005110024211443653, "grad_norm": 0.89453125, "learning_rate": 0.0019982597517522573, "loss": 0.321, "step": 2882 }, { "epoch": 0.005113570376753468, "grad_norm": 0.380859375, "learning_rate": 0.001998256050293707, "loss": 0.4265, "step": 2884 }, { "epoch": 0.005117116542063283, "grad_norm": 0.8203125, "learning_rate": 0.001998252344906706, "loss": 0.3992, "step": 2886 }, { "epoch": 0.005120662707373098, "grad_norm": 0.5, "learning_rate": 0.0019982486355912712, "loss": 0.3643, "step": 2888 }, { "epoch": 0.005124208872682914, "grad_norm": 0.7109375, "learning_rate": 0.0019982449223474186, "loss": 0.3402, "step": 2890 }, { "epoch": 0.005127755037992728, "grad_norm": 0.5703125, "learning_rate": 0.001998241205175164, "loss": 0.5101, "step": 2892 }, { "epoch": 0.005131301203302544, "grad_norm": 0.578125, "learning_rate": 0.001998237484074525, "loss": 0.3634, "step": 2894 }, { "epoch": 0.005134847368612359, "grad_norm": 0.58984375, "learning_rate": 0.0019982337590455164, "loss": 0.3343, "step": 2896 }, { "epoch": 0.0051383935339221745, "grad_norm": 0.40234375, "learning_rate": 0.001998230030088155, "loss": 0.4453, "step": 2898 }, { "epoch": 0.005141939699231989, "grad_norm": 1.2265625, "learning_rate": 0.0019982262972024576, "loss": 0.395, "step": 2900 }, { "epoch": 0.0051454858645418045, "grad_norm": 1.4921875, "learning_rate": 0.00199822256038844, "loss": 0.3722, "step": 2902 }, { "epoch": 0.00514903202985162, "grad_norm": 0.3046875, "learning_rate": 0.0019982188196461187, "loss": 0.3089, "step": 2904 }, { "epoch": 0.005152578195161434, "grad_norm": 0.6796875, "learning_rate": 0.00199821507497551, "loss": 0.3687, "step": 2906 }, { "epoch": 0.00515612436047125, "grad_norm": 0.373046875, "learning_rate": 0.0019982113263766303, "loss": 0.2594, "step": 2908 }, { "epoch": 0.005159670525781065, "grad_norm": 0.796875, "learning_rate": 0.001998207573849496, "loss": 0.4331, "step": 2910 }, { "epoch": 0.00516321669109088, "grad_norm": 0.220703125, "learning_rate": 0.0019982038173941234, "loss": 0.2379, "step": 2912 }, { "epoch": 0.005166762856400695, "grad_norm": 0.546875, "learning_rate": 0.001998200057010529, "loss": 0.395, "step": 2914 }, { "epoch": 0.005170309021710511, "grad_norm": 1.046875, "learning_rate": 0.0019981962926987292, "loss": 0.3564, "step": 2916 }, { "epoch": 0.005173855187020326, "grad_norm": 0.353515625, "learning_rate": 0.001998192524458741, "loss": 0.3461, "step": 2918 }, { "epoch": 0.005177401352330141, "grad_norm": 0.25390625, "learning_rate": 0.0019981887522905802, "loss": 0.298, "step": 2920 }, { "epoch": 0.005180947517639956, "grad_norm": 0.4375, "learning_rate": 0.0019981849761942635, "loss": 0.2868, "step": 2922 }, { "epoch": 0.0051844936829497715, "grad_norm": 0.890625, "learning_rate": 0.0019981811961698077, "loss": 0.3366, "step": 2924 }, { "epoch": 0.005188039848259586, "grad_norm": 0.349609375, "learning_rate": 0.001998177412217229, "loss": 0.4207, "step": 2926 }, { "epoch": 0.0051915860135694014, "grad_norm": 0.361328125, "learning_rate": 0.0019981736243365436, "loss": 0.2418, "step": 2928 }, { "epoch": 0.005195132178879217, "grad_norm": 0.52734375, "learning_rate": 0.0019981698325277687, "loss": 0.3548, "step": 2930 }, { "epoch": 0.005198678344189032, "grad_norm": 0.953125, "learning_rate": 0.0019981660367909206, "loss": 0.505, "step": 2932 }, { "epoch": 0.005202224509498847, "grad_norm": 2.6875, "learning_rate": 0.0019981622371260162, "loss": 0.3292, "step": 2934 }, { "epoch": 0.005205770674808662, "grad_norm": 0.59375, "learning_rate": 0.001998158433533072, "loss": 0.3489, "step": 2936 }, { "epoch": 0.005209316840118478, "grad_norm": 0.75, "learning_rate": 0.0019981546260121044, "loss": 0.3117, "step": 2938 }, { "epoch": 0.005212863005428292, "grad_norm": 1.0234375, "learning_rate": 0.00199815081456313, "loss": 0.4501, "step": 2940 }, { "epoch": 0.005216409170738108, "grad_norm": 0.314453125, "learning_rate": 0.001998146999186166, "loss": 0.2799, "step": 2942 }, { "epoch": 0.005219955336047923, "grad_norm": 0.49609375, "learning_rate": 0.0019981431798812284, "loss": 0.2491, "step": 2944 }, { "epoch": 0.005223501501357738, "grad_norm": 0.94921875, "learning_rate": 0.001998139356648334, "loss": 0.3046, "step": 2946 }, { "epoch": 0.005227047666667553, "grad_norm": 0.37890625, "learning_rate": 0.0019981355294875, "loss": 0.3474, "step": 2948 }, { "epoch": 0.0052305938319773685, "grad_norm": 0.60546875, "learning_rate": 0.001998131698398743, "loss": 0.3788, "step": 2950 }, { "epoch": 0.005234139997287184, "grad_norm": 0.5078125, "learning_rate": 0.0019981278633820795, "loss": 0.3671, "step": 2952 }, { "epoch": 0.0052376861625969984, "grad_norm": 0.35546875, "learning_rate": 0.0019981240244375266, "loss": 0.3066, "step": 2954 }, { "epoch": 0.005241232327906814, "grad_norm": 1.1328125, "learning_rate": 0.001998120181565101, "loss": 0.4496, "step": 2956 }, { "epoch": 0.005244778493216629, "grad_norm": 0.3515625, "learning_rate": 0.001998116334764819, "loss": 0.2959, "step": 2958 }, { "epoch": 0.005248324658526444, "grad_norm": 0.48828125, "learning_rate": 0.0019981124840366985, "loss": 0.3182, "step": 2960 }, { "epoch": 0.005251870823836259, "grad_norm": 1.125, "learning_rate": 0.001998108629380755, "loss": 0.3046, "step": 2962 }, { "epoch": 0.005255416989146075, "grad_norm": 0.97265625, "learning_rate": 0.001998104770797006, "loss": 0.3488, "step": 2964 }, { "epoch": 0.00525896315445589, "grad_norm": 0.380859375, "learning_rate": 0.0019981009082854695, "loss": 0.2864, "step": 2966 }, { "epoch": 0.005262509319765705, "grad_norm": 0.27734375, "learning_rate": 0.00199809704184616, "loss": 0.3037, "step": 2968 }, { "epoch": 0.00526605548507552, "grad_norm": 0.318359375, "learning_rate": 0.0019980931714790964, "loss": 0.3639, "step": 2970 }, { "epoch": 0.0052696016503853355, "grad_norm": 0.69921875, "learning_rate": 0.0019980892971842947, "loss": 0.3356, "step": 2972 }, { "epoch": 0.00527314781569515, "grad_norm": 1.7734375, "learning_rate": 0.0019980854189617725, "loss": 0.4104, "step": 2974 }, { "epoch": 0.0052766939810049655, "grad_norm": 0.4921875, "learning_rate": 0.001998081536811546, "loss": 0.2835, "step": 2976 }, { "epoch": 0.005280240146314781, "grad_norm": 0.2353515625, "learning_rate": 0.0019980776507336324, "loss": 0.2296, "step": 2978 }, { "epoch": 0.0052837863116245954, "grad_norm": 0.265625, "learning_rate": 0.0019980737607280486, "loss": 0.3178, "step": 2980 }, { "epoch": 0.005287332476934411, "grad_norm": 1.03125, "learning_rate": 0.0019980698667948125, "loss": 0.2921, "step": 2982 }, { "epoch": 0.005290878642244226, "grad_norm": 0.4609375, "learning_rate": 0.00199806596893394, "loss": 0.3058, "step": 2984 }, { "epoch": 0.005294424807554042, "grad_norm": 0.291015625, "learning_rate": 0.001998062067145449, "loss": 0.354, "step": 2986 }, { "epoch": 0.005297970972863856, "grad_norm": 0.388671875, "learning_rate": 0.0019980581614293556, "loss": 0.2857, "step": 2988 }, { "epoch": 0.005301517138173672, "grad_norm": 0.296875, "learning_rate": 0.001998054251785678, "loss": 0.2665, "step": 2990 }, { "epoch": 0.005305063303483487, "grad_norm": 0.828125, "learning_rate": 0.0019980503382144325, "loss": 0.3523, "step": 2992 }, { "epoch": 0.005308609468793302, "grad_norm": 0.54296875, "learning_rate": 0.0019980464207156367, "loss": 0.2823, "step": 2994 }, { "epoch": 0.005312155634103117, "grad_norm": 0.4375, "learning_rate": 0.001998042499289307, "loss": 0.2396, "step": 2996 }, { "epoch": 0.0053157017994129325, "grad_norm": 0.2890625, "learning_rate": 0.001998038573935462, "loss": 0.3331, "step": 2998 }, { "epoch": 0.005319247964722748, "grad_norm": 0.2255859375, "learning_rate": 0.001998034644654117, "loss": 0.3299, "step": 3000 }, { "epoch": 0.0053227941300325625, "grad_norm": 0.44140625, "learning_rate": 0.0019980307114452903, "loss": 0.2866, "step": 3002 }, { "epoch": 0.005326340295342378, "grad_norm": 0.369140625, "learning_rate": 0.0019980267743089993, "loss": 0.2317, "step": 3004 }, { "epoch": 0.005329886460652193, "grad_norm": 0.310546875, "learning_rate": 0.0019980228332452605, "loss": 0.3696, "step": 3006 }, { "epoch": 0.005333432625962008, "grad_norm": 0.44140625, "learning_rate": 0.001998018888254091, "loss": 0.3672, "step": 3008 }, { "epoch": 0.005336978791271823, "grad_norm": 0.3984375, "learning_rate": 0.0019980149393355093, "loss": 0.3108, "step": 3010 }, { "epoch": 0.005340524956581639, "grad_norm": 0.3828125, "learning_rate": 0.0019980109864895317, "loss": 0.7665, "step": 3012 }, { "epoch": 0.005344071121891453, "grad_norm": 3.28125, "learning_rate": 0.0019980070297161753, "loss": 0.6211, "step": 3014 }, { "epoch": 0.005347617287201269, "grad_norm": 0.337890625, "learning_rate": 0.001998003069015458, "loss": 0.3374, "step": 3016 }, { "epoch": 0.005351163452511084, "grad_norm": 0.361328125, "learning_rate": 0.0019979991043873975, "loss": 0.4111, "step": 3018 }, { "epoch": 0.0053547096178208995, "grad_norm": 0.58984375, "learning_rate": 0.0019979951358320095, "loss": 0.3796, "step": 3020 }, { "epoch": 0.005358255783130714, "grad_norm": 0.478515625, "learning_rate": 0.001997991163349313, "loss": 0.3128, "step": 3022 }, { "epoch": 0.0053618019484405295, "grad_norm": 0.86328125, "learning_rate": 0.001997987186939325, "loss": 0.5398, "step": 3024 }, { "epoch": 0.005365348113750345, "grad_norm": 0.51171875, "learning_rate": 0.001997983206602062, "loss": 0.3081, "step": 3026 }, { "epoch": 0.0053688942790601595, "grad_norm": 0.52734375, "learning_rate": 0.0019979792223375425, "loss": 0.3187, "step": 3028 }, { "epoch": 0.005372440444369975, "grad_norm": 2.3125, "learning_rate": 0.0019979752341457834, "loss": 0.5208, "step": 3030 }, { "epoch": 0.00537598660967979, "grad_norm": 0.380859375, "learning_rate": 0.001997971242026802, "loss": 0.3044, "step": 3032 }, { "epoch": 0.005379532774989606, "grad_norm": 0.265625, "learning_rate": 0.0019979672459806163, "loss": 0.301, "step": 3034 }, { "epoch": 0.00538307894029942, "grad_norm": 1.7734375, "learning_rate": 0.0019979632460072434, "loss": 0.5821, "step": 3036 }, { "epoch": 0.005386625105609236, "grad_norm": 0.2373046875, "learning_rate": 0.0019979592421067007, "loss": 0.2779, "step": 3038 }, { "epoch": 0.005390171270919051, "grad_norm": 0.38671875, "learning_rate": 0.001997955234279006, "loss": 0.297, "step": 3040 }, { "epoch": 0.005393717436228866, "grad_norm": 0.98828125, "learning_rate": 0.0019979512225241766, "loss": 0.3257, "step": 3042 }, { "epoch": 0.005397263601538681, "grad_norm": 0.228515625, "learning_rate": 0.0019979472068422303, "loss": 0.3346, "step": 3044 }, { "epoch": 0.0054008097668484965, "grad_norm": 0.474609375, "learning_rate": 0.0019979431872331845, "loss": 0.3538, "step": 3046 }, { "epoch": 0.005404355932158311, "grad_norm": 0.421875, "learning_rate": 0.0019979391636970566, "loss": 0.2874, "step": 3048 }, { "epoch": 0.0054079020974681265, "grad_norm": 0.31640625, "learning_rate": 0.0019979351362338646, "loss": 0.3763, "step": 3050 }, { "epoch": 0.005411448262777942, "grad_norm": 0.484375, "learning_rate": 0.001997931104843626, "loss": 0.4304, "step": 3052 }, { "epoch": 0.005414994428087757, "grad_norm": 0.1826171875, "learning_rate": 0.001997927069526358, "loss": 0.3974, "step": 3054 }, { "epoch": 0.005418540593397572, "grad_norm": 0.66796875, "learning_rate": 0.0019979230302820785, "loss": 0.396, "step": 3056 }, { "epoch": 0.005422086758707387, "grad_norm": 2.40625, "learning_rate": 0.0019979189871108054, "loss": 0.262, "step": 3058 }, { "epoch": 0.005425632924017203, "grad_norm": 0.416015625, "learning_rate": 0.0019979149400125564, "loss": 0.3403, "step": 3060 }, { "epoch": 0.005429179089327017, "grad_norm": 0.32421875, "learning_rate": 0.001997910888987349, "loss": 0.407, "step": 3062 }, { "epoch": 0.005432725254636833, "grad_norm": 2.859375, "learning_rate": 0.001997906834035201, "loss": 0.3674, "step": 3064 }, { "epoch": 0.005436271419946648, "grad_norm": 0.427734375, "learning_rate": 0.0019979027751561296, "loss": 0.376, "step": 3066 }, { "epoch": 0.0054398175852564635, "grad_norm": 0.2412109375, "learning_rate": 0.0019978987123501534, "loss": 0.3365, "step": 3068 }, { "epoch": 0.005443363750566278, "grad_norm": 0.53515625, "learning_rate": 0.00199789464561729, "loss": 0.3183, "step": 3070 }, { "epoch": 0.0054469099158760935, "grad_norm": 1.2578125, "learning_rate": 0.001997890574957557, "loss": 0.3286, "step": 3072 }, { "epoch": 0.005450456081185909, "grad_norm": 0.40625, "learning_rate": 0.001997886500370971, "loss": 0.2991, "step": 3074 }, { "epoch": 0.0054540022464957235, "grad_norm": 0.255859375, "learning_rate": 0.0019978824218575522, "loss": 0.2764, "step": 3076 }, { "epoch": 0.005457548411805539, "grad_norm": 0.86328125, "learning_rate": 0.001997878339417317, "loss": 0.3843, "step": 3078 }, { "epoch": 0.005461094577115354, "grad_norm": 2.578125, "learning_rate": 0.001997874253050283, "loss": 0.3829, "step": 3080 }, { "epoch": 0.005464640742425169, "grad_norm": 0.3203125, "learning_rate": 0.0019978701627564694, "loss": 0.2504, "step": 3082 }, { "epoch": 0.005468186907734984, "grad_norm": 4.03125, "learning_rate": 0.0019978660685358927, "loss": 0.372, "step": 3084 }, { "epoch": 0.0054717330730448, "grad_norm": 0.435546875, "learning_rate": 0.0019978619703885712, "loss": 0.2704, "step": 3086 }, { "epoch": 0.005475279238354615, "grad_norm": 2.328125, "learning_rate": 0.0019978578683145236, "loss": 0.3223, "step": 3088 }, { "epoch": 0.00547882540366443, "grad_norm": 0.439453125, "learning_rate": 0.0019978537623137668, "loss": 0.2812, "step": 3090 }, { "epoch": 0.005482371568974245, "grad_norm": 0.318359375, "learning_rate": 0.0019978496523863193, "loss": 0.2967, "step": 3092 }, { "epoch": 0.0054859177342840605, "grad_norm": 0.439453125, "learning_rate": 0.001997845538532199, "loss": 0.2953, "step": 3094 }, { "epoch": 0.005489463899593875, "grad_norm": 0.6875, "learning_rate": 0.001997841420751424, "loss": 0.222, "step": 3096 }, { "epoch": 0.0054930100649036905, "grad_norm": 0.48046875, "learning_rate": 0.001997837299044012, "loss": 0.3668, "step": 3098 }, { "epoch": 0.005496556230213506, "grad_norm": 0.3515625, "learning_rate": 0.001997833173409981, "loss": 0.3087, "step": 3100 }, { "epoch": 0.005500102395523321, "grad_norm": 0.443359375, "learning_rate": 0.001997829043849349, "loss": 0.3671, "step": 3102 }, { "epoch": 0.005503648560833136, "grad_norm": 0.306640625, "learning_rate": 0.001997824910362135, "loss": 0.2986, "step": 3104 }, { "epoch": 0.005507194726142951, "grad_norm": 0.369140625, "learning_rate": 0.001997820772948356, "loss": 0.3142, "step": 3106 }, { "epoch": 0.005510740891452767, "grad_norm": 1.2578125, "learning_rate": 0.0019978166316080305, "loss": 0.324, "step": 3108 }, { "epoch": 0.005514287056762581, "grad_norm": 0.5703125, "learning_rate": 0.0019978124863411764, "loss": 0.2928, "step": 3110 }, { "epoch": 0.005517833222072397, "grad_norm": 0.494140625, "learning_rate": 0.001997808337147812, "loss": 0.2638, "step": 3112 }, { "epoch": 0.005521379387382212, "grad_norm": 0.63671875, "learning_rate": 0.001997804184027956, "loss": 0.3343, "step": 3114 }, { "epoch": 0.005524925552692027, "grad_norm": 0.263671875, "learning_rate": 0.0019978000269816254, "loss": 0.2812, "step": 3116 }, { "epoch": 0.005528471718001842, "grad_norm": 0.349609375, "learning_rate": 0.001997795866008839, "loss": 0.3444, "step": 3118 }, { "epoch": 0.0055320178833116575, "grad_norm": 0.3125, "learning_rate": 0.0019977917011096153, "loss": 0.2748, "step": 3120 }, { "epoch": 0.005535564048621473, "grad_norm": 0.53515625, "learning_rate": 0.0019977875322839717, "loss": 0.3103, "step": 3122 }, { "epoch": 0.0055391102139312875, "grad_norm": 0.294921875, "learning_rate": 0.001997783359531927, "loss": 0.2796, "step": 3124 }, { "epoch": 0.005542656379241103, "grad_norm": 1.2578125, "learning_rate": 0.0019977791828535, "loss": 0.3919, "step": 3126 }, { "epoch": 0.005546202544550918, "grad_norm": 1.3515625, "learning_rate": 0.001997775002248708, "loss": 0.5206, "step": 3128 }, { "epoch": 0.005549748709860733, "grad_norm": 1.6171875, "learning_rate": 0.0019977708177175697, "loss": 0.3797, "step": 3130 }, { "epoch": 0.005553294875170548, "grad_norm": 0.8984375, "learning_rate": 0.001997766629260103, "loss": 0.2913, "step": 3132 }, { "epoch": 0.005556841040480364, "grad_norm": 0.224609375, "learning_rate": 0.001997762436876327, "loss": 0.2958, "step": 3134 }, { "epoch": 0.005560387205790179, "grad_norm": 0.5078125, "learning_rate": 0.0019977582405662593, "loss": 0.3557, "step": 3136 }, { "epoch": 0.005563933371099994, "grad_norm": 0.875, "learning_rate": 0.0019977540403299182, "loss": 0.5528, "step": 3138 }, { "epoch": 0.005567479536409809, "grad_norm": 0.625, "learning_rate": 0.001997749836167323, "loss": 0.4444, "step": 3140 }, { "epoch": 0.0055710257017196245, "grad_norm": 3.328125, "learning_rate": 0.0019977456280784915, "loss": 0.4219, "step": 3142 }, { "epoch": 0.005574571867029439, "grad_norm": 0.98828125, "learning_rate": 0.001997741416063442, "loss": 0.2498, "step": 3144 }, { "epoch": 0.0055781180323392545, "grad_norm": 0.318359375, "learning_rate": 0.0019977372001221926, "loss": 0.3011, "step": 3146 }, { "epoch": 0.00558166419764907, "grad_norm": 1.0859375, "learning_rate": 0.0019977329802547627, "loss": 0.2365, "step": 3148 }, { "epoch": 0.0055852103629588845, "grad_norm": 2.203125, "learning_rate": 0.00199772875646117, "loss": 0.6416, "step": 3150 }, { "epoch": 0.0055887565282687, "grad_norm": 0.640625, "learning_rate": 0.0019977245287414328, "loss": 0.3236, "step": 3152 }, { "epoch": 0.005592302693578515, "grad_norm": 0.3046875, "learning_rate": 0.0019977202970955705, "loss": 0.282, "step": 3154 }, { "epoch": 0.005595848858888331, "grad_norm": 3.296875, "learning_rate": 0.001997716061523601, "loss": 0.4333, "step": 3156 }, { "epoch": 0.005599395024198145, "grad_norm": 1.515625, "learning_rate": 0.001997711822025543, "loss": 0.4657, "step": 3158 }, { "epoch": 0.005602941189507961, "grad_norm": 0.50390625, "learning_rate": 0.0019977075786014147, "loss": 0.2869, "step": 3160 }, { "epoch": 0.005606487354817776, "grad_norm": 0.6875, "learning_rate": 0.0019977033312512348, "loss": 0.2784, "step": 3162 }, { "epoch": 0.005610033520127591, "grad_norm": 0.78515625, "learning_rate": 0.0019976990799750217, "loss": 0.3246, "step": 3164 }, { "epoch": 0.005613579685437406, "grad_norm": 0.3828125, "learning_rate": 0.001997694824772795, "loss": 0.2738, "step": 3166 }, { "epoch": 0.0056171258507472215, "grad_norm": 0.6484375, "learning_rate": 0.0019976905656445727, "loss": 0.2708, "step": 3168 }, { "epoch": 0.005620672016057037, "grad_norm": 1.34375, "learning_rate": 0.0019976863025903723, "loss": 0.4467, "step": 3170 }, { "epoch": 0.0056242181813668515, "grad_norm": 0.1953125, "learning_rate": 0.001997682035610214, "loss": 0.2979, "step": 3172 }, { "epoch": 0.005627764346676667, "grad_norm": 0.1767578125, "learning_rate": 0.0019976777647041162, "loss": 0.207, "step": 3174 }, { "epoch": 0.005631310511986482, "grad_norm": 0.58203125, "learning_rate": 0.001997673489872097, "loss": 0.3519, "step": 3176 }, { "epoch": 0.005634856677296297, "grad_norm": 0.474609375, "learning_rate": 0.0019976692111141753, "loss": 0.2799, "step": 3178 }, { "epoch": 0.005638402842606112, "grad_norm": 0.357421875, "learning_rate": 0.0019976649284303705, "loss": 0.2251, "step": 3180 }, { "epoch": 0.005641949007915928, "grad_norm": 0.2197265625, "learning_rate": 0.0019976606418207004, "loss": 0.3615, "step": 3182 }, { "epoch": 0.005645495173225742, "grad_norm": 0.3828125, "learning_rate": 0.0019976563512851837, "loss": 0.342, "step": 3184 }, { "epoch": 0.005649041338535558, "grad_norm": 1.5234375, "learning_rate": 0.00199765205682384, "loss": 0.3305, "step": 3186 }, { "epoch": 0.005652587503845373, "grad_norm": 0.69921875, "learning_rate": 0.001997647758436687, "loss": 0.2968, "step": 3188 }, { "epoch": 0.0056561336691551885, "grad_norm": 0.4375, "learning_rate": 0.0019976434561237446, "loss": 0.3484, "step": 3190 }, { "epoch": 0.005659679834465003, "grad_norm": 0.404296875, "learning_rate": 0.001997639149885031, "loss": 0.2637, "step": 3192 }, { "epoch": 0.0056632259997748185, "grad_norm": 0.35546875, "learning_rate": 0.001997634839720565, "loss": 0.3236, "step": 3194 }, { "epoch": 0.005666772165084634, "grad_norm": 0.41015625, "learning_rate": 0.0019976305256303663, "loss": 0.3569, "step": 3196 }, { "epoch": 0.0056703183303944485, "grad_norm": 4.46875, "learning_rate": 0.0019976262076144523, "loss": 0.2971, "step": 3198 }, { "epoch": 0.005673864495704264, "grad_norm": 1.3515625, "learning_rate": 0.001997621885672843, "loss": 0.3112, "step": 3200 }, { "epoch": 0.005677410661014079, "grad_norm": 1.6953125, "learning_rate": 0.001997617559805557, "loss": 0.5851, "step": 3202 }, { "epoch": 0.005680956826323895, "grad_norm": 1.5625, "learning_rate": 0.001997613230012613, "loss": 0.3979, "step": 3204 }, { "epoch": 0.005684502991633709, "grad_norm": 0.765625, "learning_rate": 0.001997608896294031, "loss": 0.4134, "step": 3206 }, { "epoch": 0.005688049156943525, "grad_norm": 0.5390625, "learning_rate": 0.001997604558649828, "loss": 0.3292, "step": 3208 }, { "epoch": 0.00569159532225334, "grad_norm": 0.41015625, "learning_rate": 0.001997600217080024, "loss": 0.3304, "step": 3210 }, { "epoch": 0.005695141487563155, "grad_norm": 0.26953125, "learning_rate": 0.0019975958715846387, "loss": 0.3288, "step": 3212 }, { "epoch": 0.00569868765287297, "grad_norm": 0.796875, "learning_rate": 0.0019975915221636903, "loss": 0.3124, "step": 3214 }, { "epoch": 0.0057022338181827855, "grad_norm": 0.263671875, "learning_rate": 0.001997587168817198, "loss": 0.2574, "step": 3216 }, { "epoch": 0.0057057799834926, "grad_norm": 0.27734375, "learning_rate": 0.0019975828115451804, "loss": 0.3256, "step": 3218 }, { "epoch": 0.0057093261488024155, "grad_norm": 2.1875, "learning_rate": 0.0019975784503476575, "loss": 0.3901, "step": 3220 }, { "epoch": 0.005712872314112231, "grad_norm": 2.765625, "learning_rate": 0.0019975740852246474, "loss": 0.3555, "step": 3222 }, { "epoch": 0.005716418479422046, "grad_norm": 0.53515625, "learning_rate": 0.00199756971617617, "loss": 0.3528, "step": 3224 }, { "epoch": 0.005719964644731861, "grad_norm": 1.625, "learning_rate": 0.0019975653432022437, "loss": 0.3324, "step": 3226 }, { "epoch": 0.005723510810041676, "grad_norm": 0.396484375, "learning_rate": 0.001997560966302888, "loss": 0.2853, "step": 3228 }, { "epoch": 0.005727056975351492, "grad_norm": 1.6953125, "learning_rate": 0.001997556585478122, "loss": 0.3712, "step": 3230 }, { "epoch": 0.005730603140661306, "grad_norm": 0.3828125, "learning_rate": 0.001997552200727965, "loss": 0.3184, "step": 3232 }, { "epoch": 0.005734149305971122, "grad_norm": 0.390625, "learning_rate": 0.001997547812052436, "loss": 0.3259, "step": 3234 }, { "epoch": 0.005737695471280937, "grad_norm": 0.58203125, "learning_rate": 0.0019975434194515543, "loss": 0.3406, "step": 3236 }, { "epoch": 0.0057412416365907526, "grad_norm": 0.61328125, "learning_rate": 0.001997539022925339, "loss": 0.2699, "step": 3238 }, { "epoch": 0.005744787801900567, "grad_norm": 0.5, "learning_rate": 0.001997534622473809, "loss": 0.2815, "step": 3240 }, { "epoch": 0.0057483339672103825, "grad_norm": 0.52734375, "learning_rate": 0.0019975302180969844, "loss": 0.2411, "step": 3242 }, { "epoch": 0.005751880132520198, "grad_norm": 0.2421875, "learning_rate": 0.001997525809794884, "loss": 0.3577, "step": 3244 }, { "epoch": 0.0057554262978300125, "grad_norm": 0.46484375, "learning_rate": 0.0019975213975675266, "loss": 0.3507, "step": 3246 }, { "epoch": 0.005758972463139828, "grad_norm": 0.361328125, "learning_rate": 0.0019975169814149324, "loss": 0.2854, "step": 3248 }, { "epoch": 0.005762518628449643, "grad_norm": 0.60546875, "learning_rate": 0.00199751256133712, "loss": 0.359, "step": 3250 }, { "epoch": 0.005766064793759458, "grad_norm": 0.455078125, "learning_rate": 0.001997508137334109, "loss": 0.2627, "step": 3252 }, { "epoch": 0.005769610959069273, "grad_norm": 0.3203125, "learning_rate": 0.001997503709405919, "loss": 0.3197, "step": 3254 }, { "epoch": 0.005773157124379089, "grad_norm": 0.384765625, "learning_rate": 0.001997499277552569, "loss": 0.2707, "step": 3256 }, { "epoch": 0.005776703289688904, "grad_norm": 0.8125, "learning_rate": 0.0019974948417740782, "loss": 0.2898, "step": 3258 }, { "epoch": 0.005780249454998719, "grad_norm": 0.33984375, "learning_rate": 0.001997490402070466, "loss": 0.2489, "step": 3260 }, { "epoch": 0.005783795620308534, "grad_norm": 6.1875, "learning_rate": 0.001997485958441753, "loss": 0.3204, "step": 3262 }, { "epoch": 0.0057873417856183496, "grad_norm": 0.400390625, "learning_rate": 0.001997481510887957, "loss": 0.2608, "step": 3264 }, { "epoch": 0.005790887950928164, "grad_norm": 0.296875, "learning_rate": 0.001997477059409099, "loss": 0.3455, "step": 3266 }, { "epoch": 0.0057944341162379795, "grad_norm": 0.55078125, "learning_rate": 0.001997472604005197, "loss": 0.2793, "step": 3268 }, { "epoch": 0.005797980281547795, "grad_norm": 0.25390625, "learning_rate": 0.001997468144676271, "loss": 0.2685, "step": 3270 }, { "epoch": 0.00580152644685761, "grad_norm": 0.67578125, "learning_rate": 0.0019974636814223414, "loss": 0.302, "step": 3272 }, { "epoch": 0.005805072612167425, "grad_norm": 0.330078125, "learning_rate": 0.0019974592142434264, "loss": 0.3065, "step": 3274 }, { "epoch": 0.00580861877747724, "grad_norm": 0.419921875, "learning_rate": 0.001997454743139546, "loss": 0.2545, "step": 3276 }, { "epoch": 0.005812164942787056, "grad_norm": 0.37890625, "learning_rate": 0.0019974502681107203, "loss": 0.2476, "step": 3278 }, { "epoch": 0.00581571110809687, "grad_norm": 0.294921875, "learning_rate": 0.001997445789156968, "loss": 0.3519, "step": 3280 }, { "epoch": 0.005819257273406686, "grad_norm": 0.287109375, "learning_rate": 0.0019974413062783095, "loss": 0.3906, "step": 3282 }, { "epoch": 0.005822803438716501, "grad_norm": 0.41796875, "learning_rate": 0.0019974368194747637, "loss": 0.2944, "step": 3284 }, { "epoch": 0.005826349604026316, "grad_norm": 0.439453125, "learning_rate": 0.001997432328746351, "loss": 0.2693, "step": 3286 }, { "epoch": 0.005829895769336131, "grad_norm": 0.322265625, "learning_rate": 0.00199742783409309, "loss": 0.3522, "step": 3288 }, { "epoch": 0.0058334419346459465, "grad_norm": 0.3125, "learning_rate": 0.0019974233355150015, "loss": 0.3537, "step": 3290 }, { "epoch": 0.005836988099955762, "grad_norm": 0.341796875, "learning_rate": 0.0019974188330121045, "loss": 0.2557, "step": 3292 }, { "epoch": 0.0058405342652655765, "grad_norm": 0.6953125, "learning_rate": 0.0019974143265844187, "loss": 0.2975, "step": 3294 }, { "epoch": 0.005844080430575392, "grad_norm": 0.52734375, "learning_rate": 0.0019974098162319634, "loss": 0.2782, "step": 3296 }, { "epoch": 0.005847626595885207, "grad_norm": 0.6015625, "learning_rate": 0.001997405301954759, "loss": 0.3137, "step": 3298 }, { "epoch": 0.005851172761195022, "grad_norm": 2.28125, "learning_rate": 0.001997400783752826, "loss": 0.5785, "step": 3300 }, { "epoch": 0.005854718926504837, "grad_norm": 0.65234375, "learning_rate": 0.0019973962616261823, "loss": 0.3086, "step": 3302 }, { "epoch": 0.005858265091814653, "grad_norm": 0.447265625, "learning_rate": 0.001997391735574849, "loss": 0.3966, "step": 3304 }, { "epoch": 0.005861811257124468, "grad_norm": 0.314453125, "learning_rate": 0.0019973872055988454, "loss": 0.2754, "step": 3306 }, { "epoch": 0.005865357422434283, "grad_norm": 0.390625, "learning_rate": 0.001997382671698192, "loss": 0.3123, "step": 3308 }, { "epoch": 0.005868903587744098, "grad_norm": 0.375, "learning_rate": 0.0019973781338729073, "loss": 0.6683, "step": 3310 }, { "epoch": 0.005872449753053914, "grad_norm": 1.0859375, "learning_rate": 0.0019973735921230123, "loss": 0.3723, "step": 3312 }, { "epoch": 0.005875995918363728, "grad_norm": 0.546875, "learning_rate": 0.001997369046448526, "loss": 0.2932, "step": 3314 }, { "epoch": 0.0058795420836735435, "grad_norm": 0.6640625, "learning_rate": 0.0019973644968494693, "loss": 0.4134, "step": 3316 }, { "epoch": 0.005883088248983359, "grad_norm": 0.69921875, "learning_rate": 0.001997359943325861, "loss": 0.4393, "step": 3318 }, { "epoch": 0.0058866344142931735, "grad_norm": 0.4375, "learning_rate": 0.001997355385877722, "loss": 0.3295, "step": 3320 }, { "epoch": 0.005890180579602989, "grad_norm": 0.322265625, "learning_rate": 0.0019973508245050716, "loss": 0.3216, "step": 3322 }, { "epoch": 0.005893726744912804, "grad_norm": 0.5, "learning_rate": 0.00199734625920793, "loss": 0.3491, "step": 3324 }, { "epoch": 0.00589727291022262, "grad_norm": 0.263671875, "learning_rate": 0.0019973416899863173, "loss": 0.2758, "step": 3326 }, { "epoch": 0.005900819075532434, "grad_norm": 0.353515625, "learning_rate": 0.0019973371168402533, "loss": 0.275, "step": 3328 }, { "epoch": 0.00590436524084225, "grad_norm": 0.59375, "learning_rate": 0.0019973325397697576, "loss": 0.5971, "step": 3330 }, { "epoch": 0.005907911406152065, "grad_norm": 0.38671875, "learning_rate": 0.001997327958774851, "loss": 0.3286, "step": 3332 }, { "epoch": 0.00591145757146188, "grad_norm": 0.6875, "learning_rate": 0.0019973233738555525, "loss": 0.3469, "step": 3334 }, { "epoch": 0.005915003736771695, "grad_norm": 0.404296875, "learning_rate": 0.0019973187850118833, "loss": 0.3213, "step": 3336 }, { "epoch": 0.0059185499020815106, "grad_norm": 1.71875, "learning_rate": 0.001997314192243863, "loss": 0.4108, "step": 3338 }, { "epoch": 0.005922096067391325, "grad_norm": 0.46875, "learning_rate": 0.0019973095955515114, "loss": 0.2918, "step": 3340 }, { "epoch": 0.0059256422327011405, "grad_norm": 0.40234375, "learning_rate": 0.001997304994934849, "loss": 0.2826, "step": 3342 }, { "epoch": 0.005929188398010956, "grad_norm": 0.283203125, "learning_rate": 0.0019973003903938956, "loss": 0.305, "step": 3344 }, { "epoch": 0.005932734563320771, "grad_norm": 0.330078125, "learning_rate": 0.0019972957819286716, "loss": 0.288, "step": 3346 }, { "epoch": 0.005936280728630586, "grad_norm": 0.28515625, "learning_rate": 0.0019972911695391973, "loss": 0.2985, "step": 3348 }, { "epoch": 0.005939826893940401, "grad_norm": 4.03125, "learning_rate": 0.0019972865532254924, "loss": 0.3797, "step": 3350 }, { "epoch": 0.005943373059250217, "grad_norm": 0.349609375, "learning_rate": 0.0019972819329875774, "loss": 0.2655, "step": 3352 }, { "epoch": 0.005946919224560031, "grad_norm": 0.2734375, "learning_rate": 0.001997277308825472, "loss": 0.2296, "step": 3354 }, { "epoch": 0.005950465389869847, "grad_norm": 0.6796875, "learning_rate": 0.0019972726807391977, "loss": 0.3465, "step": 3356 }, { "epoch": 0.005954011555179662, "grad_norm": 0.396484375, "learning_rate": 0.001997268048728773, "loss": 0.268, "step": 3358 }, { "epoch": 0.005957557720489478, "grad_norm": 0.28515625, "learning_rate": 0.00199726341279422, "loss": 0.2914, "step": 3360 }, { "epoch": 0.005961103885799292, "grad_norm": 0.330078125, "learning_rate": 0.001997258772935557, "loss": 0.2261, "step": 3362 }, { "epoch": 0.0059646500511091076, "grad_norm": 0.55078125, "learning_rate": 0.001997254129152806, "loss": 0.3199, "step": 3364 }, { "epoch": 0.005968196216418923, "grad_norm": 0.53515625, "learning_rate": 0.0019972494814459864, "loss": 0.2794, "step": 3366 }, { "epoch": 0.0059717423817287375, "grad_norm": 0.416015625, "learning_rate": 0.001997244829815119, "loss": 0.3171, "step": 3368 }, { "epoch": 0.005975288547038553, "grad_norm": 0.34765625, "learning_rate": 0.0019972401742602234, "loss": 0.3427, "step": 3370 }, { "epoch": 0.005978834712348368, "grad_norm": 0.6171875, "learning_rate": 0.001997235514781321, "loss": 0.2825, "step": 3372 }, { "epoch": 0.005982380877658183, "grad_norm": 0.69921875, "learning_rate": 0.0019972308513784313, "loss": 0.2863, "step": 3374 }, { "epoch": 0.005985927042967998, "grad_norm": 0.265625, "learning_rate": 0.001997226184051575, "loss": 0.2753, "step": 3376 }, { "epoch": 0.005989473208277814, "grad_norm": 0.53515625, "learning_rate": 0.0019972215128007727, "loss": 0.2723, "step": 3378 }, { "epoch": 0.005993019373587629, "grad_norm": 2.828125, "learning_rate": 0.0019972168376260445, "loss": 0.4631, "step": 3380 }, { "epoch": 0.005996565538897444, "grad_norm": 2.296875, "learning_rate": 0.0019972121585274116, "loss": 0.4052, "step": 3382 }, { "epoch": 0.006000111704207259, "grad_norm": 0.515625, "learning_rate": 0.0019972074755048932, "loss": 0.3341, "step": 3384 }, { "epoch": 0.006003657869517075, "grad_norm": 0.275390625, "learning_rate": 0.0019972027885585106, "loss": 0.2874, "step": 3386 }, { "epoch": 0.006007204034826889, "grad_norm": 0.39453125, "learning_rate": 0.0019971980976882845, "loss": 0.2982, "step": 3388 }, { "epoch": 0.0060107502001367046, "grad_norm": 0.33984375, "learning_rate": 0.001997193402894235, "loss": 0.2858, "step": 3390 }, { "epoch": 0.00601429636544652, "grad_norm": 0.4140625, "learning_rate": 0.001997188704176382, "loss": 0.2857, "step": 3392 }, { "epoch": 0.006017842530756335, "grad_norm": 0.353515625, "learning_rate": 0.0019971840015347475, "loss": 0.2693, "step": 3394 }, { "epoch": 0.00602138869606615, "grad_norm": 0.45703125, "learning_rate": 0.001997179294969351, "loss": 0.2423, "step": 3396 }, { "epoch": 0.006024934861375965, "grad_norm": 0.416015625, "learning_rate": 0.001997174584480214, "loss": 0.2608, "step": 3398 }, { "epoch": 0.006028481026685781, "grad_norm": 0.27734375, "learning_rate": 0.001997169870067356, "loss": 0.2955, "step": 3400 }, { "epoch": 0.006032027191995595, "grad_norm": 0.91796875, "learning_rate": 0.001997165151730798, "loss": 0.3376, "step": 3402 }, { "epoch": 0.006035573357305411, "grad_norm": 0.306640625, "learning_rate": 0.0019971604294705607, "loss": 0.3282, "step": 3404 }, { "epoch": 0.006039119522615226, "grad_norm": 0.2470703125, "learning_rate": 0.001997155703286665, "loss": 0.2785, "step": 3406 }, { "epoch": 0.006042665687925041, "grad_norm": 0.396484375, "learning_rate": 0.0019971509731791315, "loss": 0.3063, "step": 3408 }, { "epoch": 0.006046211853234856, "grad_norm": 0.38671875, "learning_rate": 0.0019971462391479805, "loss": 0.2914, "step": 3410 }, { "epoch": 0.006049758018544672, "grad_norm": 2.375, "learning_rate": 0.001997141501193233, "loss": 0.5058, "step": 3412 }, { "epoch": 0.006053304183854487, "grad_norm": 0.4296875, "learning_rate": 0.00199713675931491, "loss": 0.3155, "step": 3414 }, { "epoch": 0.0060568503491643015, "grad_norm": 0.333984375, "learning_rate": 0.001997132013513032, "loss": 0.3373, "step": 3416 }, { "epoch": 0.006060396514474117, "grad_norm": 0.359375, "learning_rate": 0.0019971272637876194, "loss": 0.3214, "step": 3418 }, { "epoch": 0.006063942679783932, "grad_norm": 0.375, "learning_rate": 0.001997122510138693, "loss": 0.4549, "step": 3420 }, { "epoch": 0.006067488845093747, "grad_norm": 0.7890625, "learning_rate": 0.0019971177525662746, "loss": 0.3052, "step": 3422 }, { "epoch": 0.006071035010403562, "grad_norm": 0.451171875, "learning_rate": 0.0019971129910703834, "loss": 0.3794, "step": 3424 }, { "epoch": 0.006074581175713378, "grad_norm": 0.5625, "learning_rate": 0.0019971082256510417, "loss": 0.2553, "step": 3426 }, { "epoch": 0.006078127341023193, "grad_norm": 0.82421875, "learning_rate": 0.00199710345630827, "loss": 0.2827, "step": 3428 }, { "epoch": 0.006081673506333008, "grad_norm": 0.51953125, "learning_rate": 0.0019970986830420883, "loss": 0.2815, "step": 3430 }, { "epoch": 0.006085219671642823, "grad_norm": 0.322265625, "learning_rate": 0.001997093905852518, "loss": 0.2938, "step": 3432 }, { "epoch": 0.006088765836952639, "grad_norm": 1.4375, "learning_rate": 0.0019970891247395803, "loss": 0.2535, "step": 3434 }, { "epoch": 0.006092312002262453, "grad_norm": 0.33984375, "learning_rate": 0.0019970843397032955, "loss": 0.2621, "step": 3436 }, { "epoch": 0.006095858167572269, "grad_norm": 0.61328125, "learning_rate": 0.0019970795507436856, "loss": 0.3499, "step": 3438 }, { "epoch": 0.006099404332882084, "grad_norm": 0.828125, "learning_rate": 0.0019970747578607704, "loss": 0.3129, "step": 3440 }, { "epoch": 0.0061029504981918985, "grad_norm": 0.369140625, "learning_rate": 0.001997069961054571, "loss": 0.3029, "step": 3442 }, { "epoch": 0.006106496663501714, "grad_norm": 0.333984375, "learning_rate": 0.001997065160325109, "loss": 0.2923, "step": 3444 }, { "epoch": 0.006110042828811529, "grad_norm": 0.65625, "learning_rate": 0.0019970603556724053, "loss": 0.3006, "step": 3446 }, { "epoch": 0.006113588994121345, "grad_norm": 0.66015625, "learning_rate": 0.0019970555470964803, "loss": 0.3404, "step": 3448 }, { "epoch": 0.006117135159431159, "grad_norm": 0.48828125, "learning_rate": 0.001997050734597356, "loss": 0.2949, "step": 3450 }, { "epoch": 0.006120681324740975, "grad_norm": 0.640625, "learning_rate": 0.001997045918175052, "loss": 0.3468, "step": 3452 }, { "epoch": 0.00612422749005079, "grad_norm": 1.78125, "learning_rate": 0.001997041097829591, "loss": 0.4974, "step": 3454 }, { "epoch": 0.006127773655360605, "grad_norm": 0.318359375, "learning_rate": 0.001997036273560992, "loss": 0.2368, "step": 3456 }, { "epoch": 0.00613131982067042, "grad_norm": 0.9296875, "learning_rate": 0.001997031445369279, "loss": 0.3351, "step": 3458 }, { "epoch": 0.006134865985980236, "grad_norm": 1.40625, "learning_rate": 0.0019970266132544705, "loss": 0.2688, "step": 3460 }, { "epoch": 0.006138412151290051, "grad_norm": 0.248046875, "learning_rate": 0.001997021777216589, "loss": 0.2481, "step": 3462 }, { "epoch": 0.0061419583165998656, "grad_norm": 1.0703125, "learning_rate": 0.0019970169372556554, "loss": 0.3255, "step": 3464 }, { "epoch": 0.006145504481909681, "grad_norm": 0.46875, "learning_rate": 0.001997012093371691, "loss": 0.3865, "step": 3466 }, { "epoch": 0.006149050647219496, "grad_norm": 0.380859375, "learning_rate": 0.001997007245564716, "loss": 0.2771, "step": 3468 }, { "epoch": 0.006152596812529311, "grad_norm": 9.25, "learning_rate": 0.001997002393834753, "loss": 0.3283, "step": 3470 }, { "epoch": 0.006156142977839126, "grad_norm": 0.51953125, "learning_rate": 0.001996997538181822, "loss": 0.3164, "step": 3472 }, { "epoch": 0.006159689143148942, "grad_norm": 0.44140625, "learning_rate": 0.0019969926786059453, "loss": 0.3194, "step": 3474 }, { "epoch": 0.006163235308458756, "grad_norm": 1.25, "learning_rate": 0.0019969878151071432, "loss": 0.2693, "step": 3476 }, { "epoch": 0.006166781473768572, "grad_norm": 0.35546875, "learning_rate": 0.001996982947685438, "loss": 0.2238, "step": 3478 }, { "epoch": 0.006170327639078387, "grad_norm": 0.8125, "learning_rate": 0.00199697807634085, "loss": 0.3017, "step": 3480 }, { "epoch": 0.006173873804388203, "grad_norm": 1.8203125, "learning_rate": 0.001996973201073401, "loss": 0.3029, "step": 3482 }, { "epoch": 0.006177419969698017, "grad_norm": 0.26953125, "learning_rate": 0.001996968321883112, "loss": 0.3359, "step": 3484 }, { "epoch": 0.006180966135007833, "grad_norm": 0.6328125, "learning_rate": 0.001996963438770005, "loss": 0.2878, "step": 3486 }, { "epoch": 0.006184512300317648, "grad_norm": 0.875, "learning_rate": 0.0019969585517341007, "loss": 0.321, "step": 3488 }, { "epoch": 0.0061880584656274626, "grad_norm": 0.40234375, "learning_rate": 0.0019969536607754215, "loss": 0.2913, "step": 3490 }, { "epoch": 0.006191604630937278, "grad_norm": 0.34765625, "learning_rate": 0.0019969487658939872, "loss": 0.2863, "step": 3492 }, { "epoch": 0.006195150796247093, "grad_norm": 0.77734375, "learning_rate": 0.00199694386708982, "loss": 0.5539, "step": 3494 }, { "epoch": 0.006198696961556909, "grad_norm": 0.56640625, "learning_rate": 0.0019969389643629413, "loss": 0.2408, "step": 3496 }, { "epoch": 0.006202243126866723, "grad_norm": 0.515625, "learning_rate": 0.001996934057713373, "loss": 0.2953, "step": 3498 }, { "epoch": 0.006205789292176539, "grad_norm": 0.66015625, "learning_rate": 0.0019969291471411354, "loss": 0.3432, "step": 3500 }, { "epoch": 0.006209335457486354, "grad_norm": 0.357421875, "learning_rate": 0.0019969242326462514, "loss": 0.342, "step": 3502 }, { "epoch": 0.006212881622796169, "grad_norm": 0.326171875, "learning_rate": 0.0019969193142287418, "loss": 0.2239, "step": 3504 }, { "epoch": 0.006216427788105984, "grad_norm": 0.25390625, "learning_rate": 0.0019969143918886277, "loss": 0.2682, "step": 3506 }, { "epoch": 0.0062199739534158, "grad_norm": 0.53125, "learning_rate": 0.0019969094656259313, "loss": 0.3301, "step": 3508 }, { "epoch": 0.006223520118725614, "grad_norm": 0.51953125, "learning_rate": 0.0019969045354406734, "loss": 0.2506, "step": 3510 }, { "epoch": 0.00622706628403543, "grad_norm": 0.49609375, "learning_rate": 0.001996899601332877, "loss": 0.344, "step": 3512 }, { "epoch": 0.006230612449345245, "grad_norm": 0.55859375, "learning_rate": 0.001996894663302562, "loss": 0.3055, "step": 3514 }, { "epoch": 0.00623415861465506, "grad_norm": 0.70703125, "learning_rate": 0.0019968897213497507, "loss": 0.3131, "step": 3516 }, { "epoch": 0.006237704779964875, "grad_norm": 2.0625, "learning_rate": 0.001996884775474465, "loss": 0.298, "step": 3518 }, { "epoch": 0.00624125094527469, "grad_norm": 0.60546875, "learning_rate": 0.0019968798256767262, "loss": 0.2768, "step": 3520 }, { "epoch": 0.006244797110584506, "grad_norm": 76.0, "learning_rate": 0.001996874871956556, "loss": 0.4224, "step": 3522 }, { "epoch": 0.00624834327589432, "grad_norm": 0.51953125, "learning_rate": 0.001996869914313976, "loss": 0.2167, "step": 3524 }, { "epoch": 0.006251889441204136, "grad_norm": 0.3125, "learning_rate": 0.0019968649527490083, "loss": 0.2859, "step": 3526 }, { "epoch": 0.006255435606513951, "grad_norm": 0.4765625, "learning_rate": 0.001996859987261674, "loss": 0.2549, "step": 3528 }, { "epoch": 0.006258981771823767, "grad_norm": 1.546875, "learning_rate": 0.001996855017851995, "loss": 0.3599, "step": 3530 }, { "epoch": 0.006262527937133581, "grad_norm": 0.29296875, "learning_rate": 0.0019968500445199933, "loss": 0.2769, "step": 3532 }, { "epoch": 0.006266074102443397, "grad_norm": 0.234375, "learning_rate": 0.0019968450672656905, "loss": 0.3375, "step": 3534 }, { "epoch": 0.006269620267753212, "grad_norm": 0.6484375, "learning_rate": 0.0019968400860891082, "loss": 0.307, "step": 3536 }, { "epoch": 0.006273166433063027, "grad_norm": 0.5078125, "learning_rate": 0.001996835100990268, "loss": 0.4704, "step": 3538 }, { "epoch": 0.006276712598372842, "grad_norm": 0.78125, "learning_rate": 0.0019968301119691924, "loss": 0.2594, "step": 3540 }, { "epoch": 0.006280258763682657, "grad_norm": 0.51171875, "learning_rate": 0.0019968251190259027, "loss": 0.2916, "step": 3542 }, { "epoch": 0.006283804928992472, "grad_norm": 1.671875, "learning_rate": 0.001996820122160421, "loss": 0.664, "step": 3544 }, { "epoch": 0.006287351094302287, "grad_norm": 2.078125, "learning_rate": 0.001996815121372769, "loss": 0.304, "step": 3546 }, { "epoch": 0.006290897259612103, "grad_norm": 0.57421875, "learning_rate": 0.0019968101166629683, "loss": 0.2742, "step": 3548 }, { "epoch": 0.006294443424921918, "grad_norm": 15.6875, "learning_rate": 0.001996805108031041, "loss": 0.3182, "step": 3550 }, { "epoch": 0.006297989590231733, "grad_norm": 1.6875, "learning_rate": 0.00199680009547701, "loss": 0.4785, "step": 3552 }, { "epoch": 0.006301535755541548, "grad_norm": 2.375, "learning_rate": 0.0019967950790008952, "loss": 0.4236, "step": 3554 }, { "epoch": 0.006305081920851364, "grad_norm": 0.5390625, "learning_rate": 0.0019967900586027204, "loss": 0.2976, "step": 3556 }, { "epoch": 0.006308628086161178, "grad_norm": 2.90625, "learning_rate": 0.0019967850342825066, "loss": 0.3757, "step": 3558 }, { "epoch": 0.006312174251470994, "grad_norm": 0.328125, "learning_rate": 0.0019967800060402756, "loss": 0.1831, "step": 3560 }, { "epoch": 0.006315720416780809, "grad_norm": 0.56640625, "learning_rate": 0.0019967749738760503, "loss": 0.2324, "step": 3562 }, { "epoch": 0.006319266582090624, "grad_norm": 0.3671875, "learning_rate": 0.0019967699377898517, "loss": 0.3373, "step": 3564 }, { "epoch": 0.006322812747400439, "grad_norm": 1.984375, "learning_rate": 0.0019967648977817025, "loss": 0.2453, "step": 3566 }, { "epoch": 0.006326358912710254, "grad_norm": 0.416015625, "learning_rate": 0.0019967598538516247, "loss": 0.3425, "step": 3568 }, { "epoch": 0.00632990507802007, "grad_norm": 0.40234375, "learning_rate": 0.00199675480599964, "loss": 0.2809, "step": 3570 }, { "epoch": 0.006333451243329884, "grad_norm": 1.3203125, "learning_rate": 0.0019967497542257707, "loss": 0.3548, "step": 3572 }, { "epoch": 0.0063369974086397, "grad_norm": 0.478515625, "learning_rate": 0.001996744698530039, "loss": 0.3291, "step": 3574 }, { "epoch": 0.006340543573949515, "grad_norm": 0.50390625, "learning_rate": 0.0019967396389124667, "loss": 0.2642, "step": 3576 }, { "epoch": 0.00634408973925933, "grad_norm": 0.8828125, "learning_rate": 0.001996734575373076, "loss": 0.2874, "step": 3578 }, { "epoch": 0.006347635904569145, "grad_norm": 0.43359375, "learning_rate": 0.0019967295079118897, "loss": 0.3389, "step": 3580 }, { "epoch": 0.006351182069878961, "grad_norm": 0.87109375, "learning_rate": 0.0019967244365289294, "loss": 0.5039, "step": 3582 }, { "epoch": 0.006354728235188776, "grad_norm": 1.2421875, "learning_rate": 0.001996719361224217, "loss": 0.3724, "step": 3584 }, { "epoch": 0.006358274400498591, "grad_norm": 4.96875, "learning_rate": 0.0019967142819977742, "loss": 0.7329, "step": 3586 }, { "epoch": 0.006361820565808406, "grad_norm": 0.357421875, "learning_rate": 0.0019967091988496253, "loss": 0.2947, "step": 3588 }, { "epoch": 0.006365366731118221, "grad_norm": 0.373046875, "learning_rate": 0.0019967041117797905, "loss": 0.5445, "step": 3590 }, { "epoch": 0.006368912896428036, "grad_norm": 0.58984375, "learning_rate": 0.001996699020788293, "loss": 0.2339, "step": 3592 }, { "epoch": 0.006372459061737851, "grad_norm": 0.490234375, "learning_rate": 0.0019966939258751547, "loss": 0.3004, "step": 3594 }, { "epoch": 0.006376005227047667, "grad_norm": 2.234375, "learning_rate": 0.0019966888270403983, "loss": 0.3147, "step": 3596 }, { "epoch": 0.006379551392357482, "grad_norm": 1.2578125, "learning_rate": 0.0019966837242840455, "loss": 0.3833, "step": 3598 }, { "epoch": 0.006383097557667297, "grad_norm": 0.73828125, "learning_rate": 0.0019966786176061195, "loss": 0.4519, "step": 3600 }, { "epoch": 0.006386643722977112, "grad_norm": 0.333984375, "learning_rate": 0.0019966735070066416, "loss": 0.2226, "step": 3602 }, { "epoch": 0.006390189888286928, "grad_norm": 0.70703125, "learning_rate": 0.0019966683924856348, "loss": 0.2774, "step": 3604 }, { "epoch": 0.006393736053596742, "grad_norm": 0.76171875, "learning_rate": 0.001996663274043121, "loss": 0.3601, "step": 3606 }, { "epoch": 0.006397282218906558, "grad_norm": 0.498046875, "learning_rate": 0.001996658151679123, "loss": 0.2697, "step": 3608 }, { "epoch": 0.006400828384216373, "grad_norm": 0.349609375, "learning_rate": 0.0019966530253936634, "loss": 0.2523, "step": 3610 }, { "epoch": 0.006404374549526188, "grad_norm": 2.875, "learning_rate": 0.001996647895186764, "loss": 0.3261, "step": 3612 }, { "epoch": 0.006407920714836003, "grad_norm": 0.478515625, "learning_rate": 0.001996642761058448, "loss": 0.2746, "step": 3614 }, { "epoch": 0.006411466880145818, "grad_norm": 0.27734375, "learning_rate": 0.001996637623008737, "loss": 0.2738, "step": 3616 }, { "epoch": 0.006415013045455634, "grad_norm": 1.1484375, "learning_rate": 0.0019966324810376536, "loss": 0.3113, "step": 3618 }, { "epoch": 0.006418559210765448, "grad_norm": 0.90234375, "learning_rate": 0.001996627335145221, "loss": 0.2843, "step": 3620 }, { "epoch": 0.006422105376075264, "grad_norm": 0.96875, "learning_rate": 0.001996622185331461, "loss": 0.2946, "step": 3622 }, { "epoch": 0.006425651541385079, "grad_norm": 1.734375, "learning_rate": 0.0019966170315963965, "loss": 0.3485, "step": 3624 }, { "epoch": 0.006429197706694894, "grad_norm": 2.046875, "learning_rate": 0.0019966118739400502, "loss": 0.4324, "step": 3626 }, { "epoch": 0.006432743872004709, "grad_norm": 0.4453125, "learning_rate": 0.001996606712362444, "loss": 0.2174, "step": 3628 }, { "epoch": 0.006436290037314525, "grad_norm": 1.5390625, "learning_rate": 0.001996601546863601, "loss": 0.3234, "step": 3630 }, { "epoch": 0.00643983620262434, "grad_norm": 0.451171875, "learning_rate": 0.0019965963774435437, "loss": 0.3029, "step": 3632 }, { "epoch": 0.006443382367934155, "grad_norm": 0.82421875, "learning_rate": 0.0019965912041022943, "loss": 0.2654, "step": 3634 }, { "epoch": 0.00644692853324397, "grad_norm": 0.53515625, "learning_rate": 0.001996586026839876, "loss": 0.2686, "step": 3636 }, { "epoch": 0.0064504746985537854, "grad_norm": 1.1796875, "learning_rate": 0.0019965808456563114, "loss": 0.2771, "step": 3638 }, { "epoch": 0.0064540208638636, "grad_norm": 0.73046875, "learning_rate": 0.001996575660551623, "loss": 0.415, "step": 3640 }, { "epoch": 0.006457567029173415, "grad_norm": 0.66015625, "learning_rate": 0.001996570471525833, "loss": 0.4476, "step": 3642 }, { "epoch": 0.006461113194483231, "grad_norm": 2.5, "learning_rate": 0.001996565278578965, "loss": 0.495, "step": 3644 }, { "epoch": 0.006464659359793045, "grad_norm": 0.55859375, "learning_rate": 0.001996560081711041, "loss": 0.2976, "step": 3646 }, { "epoch": 0.006468205525102861, "grad_norm": 1.0, "learning_rate": 0.001996554880922084, "loss": 0.2428, "step": 3648 }, { "epoch": 0.006471751690412676, "grad_norm": 0.90234375, "learning_rate": 0.001996549676212117, "loss": 0.5618, "step": 3650 }, { "epoch": 0.006475297855722492, "grad_norm": 0.396484375, "learning_rate": 0.0019965444675811624, "loss": 0.2521, "step": 3652 }, { "epoch": 0.006478844021032306, "grad_norm": 1.234375, "learning_rate": 0.001996539255029243, "loss": 0.3476, "step": 3654 }, { "epoch": 0.006482390186342122, "grad_norm": 0.625, "learning_rate": 0.0019965340385563815, "loss": 0.2919, "step": 3656 }, { "epoch": 0.006485936351651937, "grad_norm": 0.83203125, "learning_rate": 0.001996528818162601, "loss": 0.37, "step": 3658 }, { "epoch": 0.006489482516961752, "grad_norm": 0.578125, "learning_rate": 0.001996523593847924, "loss": 0.3794, "step": 3660 }, { "epoch": 0.006493028682271567, "grad_norm": 0.453125, "learning_rate": 0.001996518365612374, "loss": 0.2594, "step": 3662 }, { "epoch": 0.0064965748475813824, "grad_norm": 0.46484375, "learning_rate": 0.0019965131334559734, "loss": 0.4004, "step": 3664 }, { "epoch": 0.006500121012891198, "grad_norm": 0.7734375, "learning_rate": 0.001996507897378745, "loss": 0.321, "step": 3666 }, { "epoch": 0.006503667178201012, "grad_norm": 0.69140625, "learning_rate": 0.0019965026573807113, "loss": 0.3256, "step": 3668 }, { "epoch": 0.006507213343510828, "grad_norm": 0.5234375, "learning_rate": 0.001996497413461896, "loss": 0.2443, "step": 3670 }, { "epoch": 0.006510759508820643, "grad_norm": 1.6484375, "learning_rate": 0.0019964921656223226, "loss": 0.5052, "step": 3672 }, { "epoch": 0.006514305674130458, "grad_norm": 0.4140625, "learning_rate": 0.001996486913862012, "loss": 0.3542, "step": 3674 }, { "epoch": 0.006517851839440273, "grad_norm": 0.365234375, "learning_rate": 0.0019964816581809893, "loss": 0.2876, "step": 3676 }, { "epoch": 0.006521398004750089, "grad_norm": 0.404296875, "learning_rate": 0.0019964763985792764, "loss": 0.3216, "step": 3678 }, { "epoch": 0.006524944170059903, "grad_norm": 0.8515625, "learning_rate": 0.0019964711350568963, "loss": 0.3296, "step": 3680 }, { "epoch": 0.006528490335369719, "grad_norm": 0.287109375, "learning_rate": 0.001996465867613872, "loss": 0.2429, "step": 3682 }, { "epoch": 0.006532036500679534, "grad_norm": 0.71484375, "learning_rate": 0.001996460596250227, "loss": 0.2976, "step": 3684 }, { "epoch": 0.0065355826659893495, "grad_norm": 0.35546875, "learning_rate": 0.0019964553209659837, "loss": 0.247, "step": 3686 }, { "epoch": 0.006539128831299164, "grad_norm": 0.31640625, "learning_rate": 0.0019964500417611656, "loss": 0.2985, "step": 3688 }, { "epoch": 0.006542674996608979, "grad_norm": 0.427734375, "learning_rate": 0.001996444758635796, "loss": 0.3057, "step": 3690 }, { "epoch": 0.006546221161918795, "grad_norm": 1.859375, "learning_rate": 0.001996439471589898, "loss": 0.2865, "step": 3692 }, { "epoch": 0.006549767327228609, "grad_norm": 1.1796875, "learning_rate": 0.001996434180623494, "loss": 0.3899, "step": 3694 }, { "epoch": 0.006553313492538425, "grad_norm": 1.375, "learning_rate": 0.001996428885736608, "loss": 0.5637, "step": 3696 }, { "epoch": 0.00655685965784824, "grad_norm": 0.5234375, "learning_rate": 0.0019964235869292623, "loss": 0.331, "step": 3698 }, { "epoch": 0.006560405823158056, "grad_norm": 0.81640625, "learning_rate": 0.0019964182842014807, "loss": 0.2936, "step": 3700 }, { "epoch": 0.00656395198846787, "grad_norm": 0.46875, "learning_rate": 0.0019964129775532865, "loss": 0.2663, "step": 3702 }, { "epoch": 0.006567498153777686, "grad_norm": 0.98828125, "learning_rate": 0.0019964076669847022, "loss": 0.3022, "step": 3704 }, { "epoch": 0.006571044319087501, "grad_norm": 1.46875, "learning_rate": 0.0019964023524957518, "loss": 0.3109, "step": 3706 }, { "epoch": 0.006574590484397316, "grad_norm": 0.234375, "learning_rate": 0.001996397034086458, "loss": 0.3486, "step": 3708 }, { "epoch": 0.006578136649707131, "grad_norm": 0.369140625, "learning_rate": 0.001996391711756844, "loss": 0.3987, "step": 3710 }, { "epoch": 0.0065816828150169465, "grad_norm": 1.265625, "learning_rate": 0.001996386385506934, "loss": 0.3227, "step": 3712 }, { "epoch": 0.006585228980326761, "grad_norm": 0.36328125, "learning_rate": 0.00199638105533675, "loss": 0.3167, "step": 3714 }, { "epoch": 0.006588775145636576, "grad_norm": 0.494140625, "learning_rate": 0.0019963757212463165, "loss": 0.2572, "step": 3716 }, { "epoch": 0.006592321310946392, "grad_norm": 0.953125, "learning_rate": 0.0019963703832356562, "loss": 0.5886, "step": 3718 }, { "epoch": 0.006595867476256207, "grad_norm": 0.349609375, "learning_rate": 0.0019963650413047924, "loss": 0.2266, "step": 3720 }, { "epoch": 0.006599413641566022, "grad_norm": 1.40625, "learning_rate": 0.0019963596954537485, "loss": 0.4081, "step": 3722 }, { "epoch": 0.006602959806875837, "grad_norm": 0.5546875, "learning_rate": 0.001996354345682548, "loss": 0.2733, "step": 3724 }, { "epoch": 0.006606505972185653, "grad_norm": 0.96484375, "learning_rate": 0.0019963489919912142, "loss": 0.2669, "step": 3726 }, { "epoch": 0.006610052137495467, "grad_norm": 0.400390625, "learning_rate": 0.0019963436343797703, "loss": 0.2175, "step": 3728 }, { "epoch": 0.006613598302805283, "grad_norm": 0.251953125, "learning_rate": 0.001996338272848241, "loss": 0.2582, "step": 3730 }, { "epoch": 0.006617144468115098, "grad_norm": 0.359375, "learning_rate": 0.001996332907396648, "loss": 0.2588, "step": 3732 }, { "epoch": 0.0066206906334249135, "grad_norm": 0.412109375, "learning_rate": 0.001996327538025015, "loss": 0.2742, "step": 3734 }, { "epoch": 0.006624236798734728, "grad_norm": 0.55859375, "learning_rate": 0.0019963221647333667, "loss": 0.2768, "step": 3736 }, { "epoch": 0.0066277829640445434, "grad_norm": 0.361328125, "learning_rate": 0.001996316787521726, "loss": 0.2891, "step": 3738 }, { "epoch": 0.006631329129354359, "grad_norm": 0.765625, "learning_rate": 0.001996311406390116, "loss": 0.3709, "step": 3740 }, { "epoch": 0.006634875294664173, "grad_norm": 2.40625, "learning_rate": 0.0019963060213385605, "loss": 0.2232, "step": 3742 }, { "epoch": 0.006638421459973989, "grad_norm": 0.890625, "learning_rate": 0.0019963006323670835, "loss": 0.2206, "step": 3744 }, { "epoch": 0.006641967625283804, "grad_norm": 0.306640625, "learning_rate": 0.001996295239475708, "loss": 0.3767, "step": 3746 }, { "epoch": 0.006645513790593619, "grad_norm": 3.140625, "learning_rate": 0.0019962898426644574, "loss": 0.4518, "step": 3748 }, { "epoch": 0.006649059955903434, "grad_norm": 1.15625, "learning_rate": 0.001996284441933356, "loss": 0.368, "step": 3750 }, { "epoch": 0.00665260612121325, "grad_norm": 0.55859375, "learning_rate": 0.001996279037282427, "loss": 0.3004, "step": 3752 }, { "epoch": 0.006656152286523065, "grad_norm": 0.6640625, "learning_rate": 0.0019962736287116936, "loss": 0.3034, "step": 3754 }, { "epoch": 0.00665969845183288, "grad_norm": 0.6953125, "learning_rate": 0.001996268216221181, "loss": 0.4052, "step": 3756 }, { "epoch": 0.006663244617142695, "grad_norm": 0.59765625, "learning_rate": 0.0019962627998109115, "loss": 0.2305, "step": 3758 }, { "epoch": 0.0066667907824525105, "grad_norm": 1.765625, "learning_rate": 0.0019962573794809085, "loss": 0.3102, "step": 3760 }, { "epoch": 0.006670336947762325, "grad_norm": 0.87890625, "learning_rate": 0.0019962519552311968, "loss": 0.3272, "step": 3762 }, { "epoch": 0.0066738831130721404, "grad_norm": 0.7265625, "learning_rate": 0.0019962465270617997, "loss": 0.3258, "step": 3764 }, { "epoch": 0.006677429278381956, "grad_norm": 0.80078125, "learning_rate": 0.0019962410949727408, "loss": 0.2817, "step": 3766 }, { "epoch": 0.006680975443691771, "grad_norm": 0.3984375, "learning_rate": 0.0019962356589640438, "loss": 0.3349, "step": 3768 }, { "epoch": 0.006684521609001586, "grad_norm": 0.80859375, "learning_rate": 0.001996230219035733, "loss": 0.292, "step": 3770 }, { "epoch": 0.006688067774311401, "grad_norm": 0.77734375, "learning_rate": 0.0019962247751878315, "loss": 0.3389, "step": 3772 }, { "epoch": 0.006691613939621217, "grad_norm": 0.515625, "learning_rate": 0.001996219327420364, "loss": 0.3023, "step": 3774 }, { "epoch": 0.006695160104931031, "grad_norm": 0.80859375, "learning_rate": 0.0019962138757333527, "loss": 0.3233, "step": 3776 }, { "epoch": 0.006698706270240847, "grad_norm": 0.61328125, "learning_rate": 0.0019962084201268233, "loss": 0.3144, "step": 3778 }, { "epoch": 0.006702252435550662, "grad_norm": 0.478515625, "learning_rate": 0.0019962029606007984, "loss": 0.2663, "step": 3780 }, { "epoch": 0.006705798600860477, "grad_norm": 0.59375, "learning_rate": 0.0019961974971553025, "loss": 0.2852, "step": 3782 }, { "epoch": 0.006709344766170292, "grad_norm": 0.435546875, "learning_rate": 0.0019961920297903593, "loss": 0.2436, "step": 3784 }, { "epoch": 0.0067128909314801075, "grad_norm": 0.328125, "learning_rate": 0.001996186558505993, "loss": 0.4178, "step": 3786 }, { "epoch": 0.006716437096789923, "grad_norm": 0.373046875, "learning_rate": 0.0019961810833022267, "loss": 0.3111, "step": 3788 }, { "epoch": 0.006719983262099737, "grad_norm": 0.48046875, "learning_rate": 0.0019961756041790854, "loss": 0.2805, "step": 3790 }, { "epoch": 0.006723529427409553, "grad_norm": 0.3984375, "learning_rate": 0.0019961701211365927, "loss": 0.3611, "step": 3792 }, { "epoch": 0.006727075592719368, "grad_norm": 0.302734375, "learning_rate": 0.001996164634174772, "loss": 0.2097, "step": 3794 }, { "epoch": 0.006730621758029183, "grad_norm": 0.515625, "learning_rate": 0.0019961591432936477, "loss": 0.3033, "step": 3796 }, { "epoch": 0.006734167923338998, "grad_norm": 7.34375, "learning_rate": 0.0019961536484932444, "loss": 0.2526, "step": 3798 }, { "epoch": 0.006737714088648814, "grad_norm": 1.234375, "learning_rate": 0.001996148149773585, "loss": 0.2675, "step": 3800 }, { "epoch": 0.006741260253958629, "grad_norm": 0.462890625, "learning_rate": 0.0019961426471346946, "loss": 0.3236, "step": 3802 }, { "epoch": 0.006744806419268444, "grad_norm": 0.322265625, "learning_rate": 0.0019961371405765966, "loss": 0.2906, "step": 3804 }, { "epoch": 0.006748352584578259, "grad_norm": 0.59375, "learning_rate": 0.001996131630099315, "loss": 0.3176, "step": 3806 }, { "epoch": 0.0067518987498880745, "grad_norm": 0.373046875, "learning_rate": 0.0019961261157028748, "loss": 0.354, "step": 3808 }, { "epoch": 0.006755444915197889, "grad_norm": 0.2490234375, "learning_rate": 0.001996120597387299, "loss": 0.3218, "step": 3810 }, { "epoch": 0.0067589910805077045, "grad_norm": 0.5, "learning_rate": 0.001996115075152612, "loss": 0.2484, "step": 3812 }, { "epoch": 0.00676253724581752, "grad_norm": 0.400390625, "learning_rate": 0.001996109548998839, "loss": 0.3758, "step": 3814 }, { "epoch": 0.006766083411127334, "grad_norm": 0.72265625, "learning_rate": 0.001996104018926003, "loss": 0.1925, "step": 3816 }, { "epoch": 0.00676962957643715, "grad_norm": 1.859375, "learning_rate": 0.0019960984849341284, "loss": 0.3434, "step": 3818 }, { "epoch": 0.006773175741746965, "grad_norm": 0.81640625, "learning_rate": 0.00199609294702324, "loss": 0.2872, "step": 3820 }, { "epoch": 0.006776721907056781, "grad_norm": 0.416015625, "learning_rate": 0.0019960874051933608, "loss": 0.2461, "step": 3822 }, { "epoch": 0.006780268072366595, "grad_norm": 0.40625, "learning_rate": 0.0019960818594445162, "loss": 0.2654, "step": 3824 }, { "epoch": 0.006783814237676411, "grad_norm": 1.7421875, "learning_rate": 0.00199607630977673, "loss": 0.3872, "step": 3826 }, { "epoch": 0.006787360402986226, "grad_norm": 0.357421875, "learning_rate": 0.001996070756190027, "loss": 0.2594, "step": 3828 }, { "epoch": 0.006790906568296041, "grad_norm": 0.54296875, "learning_rate": 0.0019960651986844304, "loss": 0.2353, "step": 3830 }, { "epoch": 0.006794452733605856, "grad_norm": 0.30859375, "learning_rate": 0.001996059637259965, "loss": 0.3036, "step": 3832 }, { "epoch": 0.0067979988989156715, "grad_norm": 0.271484375, "learning_rate": 0.0019960540719166555, "loss": 0.2459, "step": 3834 }, { "epoch": 0.006801545064225487, "grad_norm": 0.59375, "learning_rate": 0.001996048502654526, "loss": 0.3075, "step": 3836 }, { "epoch": 0.0068050912295353014, "grad_norm": 0.34765625, "learning_rate": 0.001996042929473601, "loss": 0.336, "step": 3838 }, { "epoch": 0.006808637394845117, "grad_norm": 0.31640625, "learning_rate": 0.0019960373523739048, "loss": 0.3007, "step": 3840 }, { "epoch": 0.006812183560154932, "grad_norm": 0.3671875, "learning_rate": 0.0019960317713554614, "loss": 0.2348, "step": 3842 }, { "epoch": 0.006815729725464747, "grad_norm": 1.15625, "learning_rate": 0.0019960261864182954, "loss": 0.3357, "step": 3844 }, { "epoch": 0.006819275890774562, "grad_norm": 0.341796875, "learning_rate": 0.0019960205975624317, "loss": 0.3053, "step": 3846 }, { "epoch": 0.006822822056084378, "grad_norm": 0.44921875, "learning_rate": 0.001996015004787894, "loss": 0.265, "step": 3848 }, { "epoch": 0.006826368221394192, "grad_norm": 0.27734375, "learning_rate": 0.0019960094080947077, "loss": 0.3206, "step": 3850 }, { "epoch": 0.006829914386704008, "grad_norm": 0.333984375, "learning_rate": 0.0019960038074828958, "loss": 0.2849, "step": 3852 }, { "epoch": 0.006833460552013823, "grad_norm": 0.5546875, "learning_rate": 0.0019959982029524844, "loss": 0.2584, "step": 3854 }, { "epoch": 0.0068370067173236385, "grad_norm": 0.341796875, "learning_rate": 0.0019959925945034975, "loss": 0.3027, "step": 3856 }, { "epoch": 0.006840552882633453, "grad_norm": 0.337890625, "learning_rate": 0.001995986982135959, "loss": 0.2491, "step": 3858 }, { "epoch": 0.0068440990479432685, "grad_norm": 1.484375, "learning_rate": 0.0019959813658498938, "loss": 0.6935, "step": 3860 }, { "epoch": 0.006847645213253084, "grad_norm": 0.75, "learning_rate": 0.001995975745645327, "loss": 0.3274, "step": 3862 }, { "epoch": 0.0068511913785628984, "grad_norm": 0.578125, "learning_rate": 0.0019959701215222824, "loss": 0.2932, "step": 3864 }, { "epoch": 0.006854737543872714, "grad_norm": 1.0859375, "learning_rate": 0.001995964493480785, "loss": 0.3154, "step": 3866 }, { "epoch": 0.006858283709182529, "grad_norm": 0.5625, "learning_rate": 0.0019959588615208594, "loss": 0.3343, "step": 3868 }, { "epoch": 0.006861829874492345, "grad_norm": 0.515625, "learning_rate": 0.00199595322564253, "loss": 0.3474, "step": 3870 }, { "epoch": 0.006865376039802159, "grad_norm": 0.5078125, "learning_rate": 0.001995947585845822, "loss": 0.3114, "step": 3872 }, { "epoch": 0.006868922205111975, "grad_norm": 2.171875, "learning_rate": 0.001995941942130759, "loss": 0.4198, "step": 3874 }, { "epoch": 0.00687246837042179, "grad_norm": 0.3671875, "learning_rate": 0.0019959362944973673, "loss": 0.2397, "step": 3876 }, { "epoch": 0.006876014535731605, "grad_norm": 0.255859375, "learning_rate": 0.0019959306429456697, "loss": 0.2356, "step": 3878 }, { "epoch": 0.00687956070104142, "grad_norm": 1.015625, "learning_rate": 0.0019959249874756924, "loss": 0.2489, "step": 3880 }, { "epoch": 0.0068831068663512355, "grad_norm": 2.359375, "learning_rate": 0.0019959193280874596, "loss": 0.4578, "step": 3882 }, { "epoch": 0.00688665303166105, "grad_norm": 0.78515625, "learning_rate": 0.0019959136647809965, "loss": 0.2947, "step": 3884 }, { "epoch": 0.0068901991969708655, "grad_norm": 0.515625, "learning_rate": 0.001995907997556327, "loss": 0.3085, "step": 3886 }, { "epoch": 0.006893745362280681, "grad_norm": 2.15625, "learning_rate": 0.001995902326413476, "loss": 0.3363, "step": 3888 }, { "epoch": 0.006897291527590496, "grad_norm": 0.55859375, "learning_rate": 0.001995896651352469, "loss": 0.2899, "step": 3890 }, { "epoch": 0.006900837692900311, "grad_norm": 0.423828125, "learning_rate": 0.0019958909723733305, "loss": 0.267, "step": 3892 }, { "epoch": 0.006904383858210126, "grad_norm": 0.9375, "learning_rate": 0.0019958852894760852, "loss": 0.3345, "step": 3894 }, { "epoch": 0.006907930023519942, "grad_norm": 0.47265625, "learning_rate": 0.001995879602660758, "loss": 0.3044, "step": 3896 }, { "epoch": 0.006911476188829756, "grad_norm": 0.275390625, "learning_rate": 0.001995873911927374, "loss": 0.2471, "step": 3898 }, { "epoch": 0.006915022354139572, "grad_norm": 0.57421875, "learning_rate": 0.0019958682172759577, "loss": 0.2842, "step": 3900 }, { "epoch": 0.006918568519449387, "grad_norm": 0.306640625, "learning_rate": 0.0019958625187065336, "loss": 0.292, "step": 3902 }, { "epoch": 0.0069221146847592025, "grad_norm": 0.416015625, "learning_rate": 0.001995856816219128, "loss": 0.2522, "step": 3904 }, { "epoch": 0.006925660850069017, "grad_norm": 0.2041015625, "learning_rate": 0.0019958511098137647, "loss": 0.2242, "step": 3906 }, { "epoch": 0.0069292070153788325, "grad_norm": 0.5859375, "learning_rate": 0.0019958453994904693, "loss": 0.2738, "step": 3908 }, { "epoch": 0.006932753180688648, "grad_norm": 0.451171875, "learning_rate": 0.0019958396852492667, "loss": 0.2446, "step": 3910 }, { "epoch": 0.0069362993459984625, "grad_norm": 2.59375, "learning_rate": 0.0019958339670901816, "loss": 0.3562, "step": 3912 }, { "epoch": 0.006939845511308278, "grad_norm": 0.83203125, "learning_rate": 0.0019958282450132387, "loss": 0.2546, "step": 3914 }, { "epoch": 0.006943391676618093, "grad_norm": 0.72265625, "learning_rate": 0.0019958225190184636, "loss": 0.2398, "step": 3916 }, { "epoch": 0.006946937841927908, "grad_norm": 0.6796875, "learning_rate": 0.001995816789105881, "loss": 0.3362, "step": 3918 }, { "epoch": 0.006950484007237723, "grad_norm": 0.486328125, "learning_rate": 0.0019958110552755165, "loss": 0.29, "step": 3920 }, { "epoch": 0.006954030172547539, "grad_norm": 0.75, "learning_rate": 0.0019958053175273944, "loss": 0.2743, "step": 3922 }, { "epoch": 0.006957576337857354, "grad_norm": 1.6875, "learning_rate": 0.0019957995758615402, "loss": 0.3565, "step": 3924 }, { "epoch": 0.006961122503167169, "grad_norm": 0.78515625, "learning_rate": 0.0019957938302779792, "loss": 0.2403, "step": 3926 }, { "epoch": 0.006964668668476984, "grad_norm": 1.0, "learning_rate": 0.0019957880807767365, "loss": 0.3427, "step": 3928 }, { "epoch": 0.0069682148337867995, "grad_norm": 0.318359375, "learning_rate": 0.0019957823273578368, "loss": 0.221, "step": 3930 }, { "epoch": 0.006971760999096614, "grad_norm": 0.255859375, "learning_rate": 0.0019957765700213057, "loss": 0.2437, "step": 3932 }, { "epoch": 0.0069753071644064295, "grad_norm": 1.3828125, "learning_rate": 0.001995770808767168, "loss": 0.3686, "step": 3934 }, { "epoch": 0.006978853329716245, "grad_norm": 0.345703125, "learning_rate": 0.001995765043595449, "loss": 0.2674, "step": 3936 }, { "epoch": 0.0069823994950260595, "grad_norm": 2.109375, "learning_rate": 0.0019957592745061745, "loss": 0.3771, "step": 3938 }, { "epoch": 0.006985945660335875, "grad_norm": 0.8515625, "learning_rate": 0.001995753501499369, "loss": 0.2991, "step": 3940 }, { "epoch": 0.00698949182564569, "grad_norm": 0.828125, "learning_rate": 0.001995747724575058, "loss": 0.357, "step": 3942 }, { "epoch": 0.006993037990955506, "grad_norm": 0.265625, "learning_rate": 0.0019957419437332665, "loss": 0.2131, "step": 3944 }, { "epoch": 0.00699658415626532, "grad_norm": 0.251953125, "learning_rate": 0.0019957361589740203, "loss": 0.3052, "step": 3946 }, { "epoch": 0.007000130321575136, "grad_norm": 0.5546875, "learning_rate": 0.0019957303702973442, "loss": 0.2233, "step": 3948 }, { "epoch": 0.007003676486884951, "grad_norm": 0.765625, "learning_rate": 0.001995724577703264, "loss": 0.3408, "step": 3950 }, { "epoch": 0.007007222652194766, "grad_norm": 1.6015625, "learning_rate": 0.001995718781191805, "loss": 0.4903, "step": 3952 }, { "epoch": 0.007010768817504581, "grad_norm": 1.109375, "learning_rate": 0.001995712980762992, "loss": 0.2679, "step": 3954 }, { "epoch": 0.0070143149828143965, "grad_norm": 0.404296875, "learning_rate": 0.001995707176416851, "loss": 0.3888, "step": 3956 }, { "epoch": 0.007017861148124212, "grad_norm": 0.88671875, "learning_rate": 0.001995701368153407, "loss": 0.2263, "step": 3958 }, { "epoch": 0.0070214073134340265, "grad_norm": 3.015625, "learning_rate": 0.0019956955559726854, "loss": 0.3457, "step": 3960 }, { "epoch": 0.007024953478743842, "grad_norm": 0.369140625, "learning_rate": 0.0019956897398747116, "loss": 0.2678, "step": 3962 }, { "epoch": 0.007028499644053657, "grad_norm": 0.427734375, "learning_rate": 0.0019956839198595117, "loss": 0.3978, "step": 3964 }, { "epoch": 0.007032045809363472, "grad_norm": 0.9375, "learning_rate": 0.00199567809592711, "loss": 0.3307, "step": 3966 }, { "epoch": 0.007035591974673287, "grad_norm": 0.474609375, "learning_rate": 0.0019956722680775324, "loss": 0.2712, "step": 3968 }, { "epoch": 0.007039138139983103, "grad_norm": 0.65625, "learning_rate": 0.0019956664363108053, "loss": 0.3092, "step": 3970 }, { "epoch": 0.007042684305292917, "grad_norm": 0.65625, "learning_rate": 0.001995660600626953, "loss": 0.2978, "step": 3972 }, { "epoch": 0.007046230470602733, "grad_norm": 0.306640625, "learning_rate": 0.0019956547610260017, "loss": 0.251, "step": 3974 }, { "epoch": 0.007049776635912548, "grad_norm": 3.265625, "learning_rate": 0.0019956489175079768, "loss": 0.5655, "step": 3976 }, { "epoch": 0.0070533228012223635, "grad_norm": 0.365234375, "learning_rate": 0.0019956430700729037, "loss": 0.2985, "step": 3978 }, { "epoch": 0.007056868966532178, "grad_norm": 0.8671875, "learning_rate": 0.001995637218720808, "loss": 0.4254, "step": 3980 }, { "epoch": 0.0070604151318419935, "grad_norm": 1.1328125, "learning_rate": 0.0019956313634517152, "loss": 0.3667, "step": 3982 }, { "epoch": 0.007063961297151809, "grad_norm": 1.078125, "learning_rate": 0.0019956255042656514, "loss": 0.3232, "step": 3984 }, { "epoch": 0.0070675074624616235, "grad_norm": 0.54296875, "learning_rate": 0.0019956196411626418, "loss": 0.2812, "step": 3986 }, { "epoch": 0.007071053627771439, "grad_norm": 0.208984375, "learning_rate": 0.001995613774142712, "loss": 0.2535, "step": 3988 }, { "epoch": 0.007074599793081254, "grad_norm": 0.474609375, "learning_rate": 0.0019956079032058876, "loss": 0.3384, "step": 3990 }, { "epoch": 0.00707814595839107, "grad_norm": 0.6640625, "learning_rate": 0.001995602028352195, "loss": 0.2275, "step": 3992 }, { "epoch": 0.007081692123700884, "grad_norm": 0.3828125, "learning_rate": 0.001995596149581659, "loss": 0.2794, "step": 3994 }, { "epoch": 0.0070852382890107, "grad_norm": 0.251953125, "learning_rate": 0.001995590266894306, "loss": 0.2846, "step": 3996 }, { "epoch": 0.007088784454320515, "grad_norm": 0.423828125, "learning_rate": 0.001995584380290161, "loss": 0.328, "step": 3998 }, { "epoch": 0.00709233061963033, "grad_norm": 0.3671875, "learning_rate": 0.0019955784897692504, "loss": 0.2343, "step": 4000 }, { "epoch": 0.007095876784940145, "grad_norm": 0.287109375, "learning_rate": 0.0019955725953315997, "loss": 0.2368, "step": 4002 }, { "epoch": 0.0070994229502499605, "grad_norm": 0.828125, "learning_rate": 0.001995566696977234, "loss": 0.5125, "step": 4004 }, { "epoch": 0.007102969115559775, "grad_norm": 0.353515625, "learning_rate": 0.0019955607947061806, "loss": 0.3207, "step": 4006 }, { "epoch": 0.0071065152808695905, "grad_norm": 0.25390625, "learning_rate": 0.001995554888518464, "loss": 0.2598, "step": 4008 }, { "epoch": 0.007110061446179406, "grad_norm": 0.97265625, "learning_rate": 0.0019955489784141108, "loss": 0.2748, "step": 4010 }, { "epoch": 0.007113607611489221, "grad_norm": 0.345703125, "learning_rate": 0.0019955430643931464, "loss": 0.251, "step": 4012 }, { "epoch": 0.007117153776799036, "grad_norm": 0.376953125, "learning_rate": 0.001995537146455597, "loss": 0.2903, "step": 4014 }, { "epoch": 0.007120699942108851, "grad_norm": 0.65625, "learning_rate": 0.001995531224601488, "loss": 0.3316, "step": 4016 }, { "epoch": 0.007124246107418667, "grad_norm": 0.328125, "learning_rate": 0.001995525298830846, "loss": 0.269, "step": 4018 }, { "epoch": 0.007127792272728481, "grad_norm": 0.478515625, "learning_rate": 0.0019955193691436964, "loss": 0.3467, "step": 4020 }, { "epoch": 0.007131338438038297, "grad_norm": 0.392578125, "learning_rate": 0.0019955134355400654, "loss": 0.2788, "step": 4022 }, { "epoch": 0.007134884603348112, "grad_norm": 0.427734375, "learning_rate": 0.0019955074980199782, "loss": 0.2938, "step": 4024 }, { "epoch": 0.0071384307686579275, "grad_norm": 0.47265625, "learning_rate": 0.001995501556583462, "loss": 0.3235, "step": 4026 }, { "epoch": 0.007141976933967742, "grad_norm": 0.328125, "learning_rate": 0.0019954956112305418, "loss": 0.2632, "step": 4028 }, { "epoch": 0.0071455230992775575, "grad_norm": 1.4765625, "learning_rate": 0.0019954896619612445, "loss": 0.4317, "step": 4030 }, { "epoch": 0.007149069264587373, "grad_norm": 0.310546875, "learning_rate": 0.0019954837087755952, "loss": 0.3593, "step": 4032 }, { "epoch": 0.0071526154298971875, "grad_norm": 0.6484375, "learning_rate": 0.0019954777516736203, "loss": 0.4007, "step": 4034 }, { "epoch": 0.007156161595207003, "grad_norm": 0.271484375, "learning_rate": 0.001995471790655346, "loss": 0.2364, "step": 4036 }, { "epoch": 0.007159707760516818, "grad_norm": 1.2109375, "learning_rate": 0.0019954658257207982, "loss": 0.3287, "step": 4038 }, { "epoch": 0.007163253925826633, "grad_norm": 0.3125, "learning_rate": 0.0019954598568700027, "loss": 0.2472, "step": 4040 }, { "epoch": 0.007166800091136448, "grad_norm": 0.5078125, "learning_rate": 0.0019954538841029865, "loss": 0.2895, "step": 4042 }, { "epoch": 0.007170346256446264, "grad_norm": 0.318359375, "learning_rate": 0.0019954479074197743, "loss": 0.305, "step": 4044 }, { "epoch": 0.007173892421756079, "grad_norm": 0.3203125, "learning_rate": 0.001995441926820394, "loss": 0.2857, "step": 4046 }, { "epoch": 0.007177438587065894, "grad_norm": 0.65234375, "learning_rate": 0.0019954359423048702, "loss": 0.3389, "step": 4048 }, { "epoch": 0.007180984752375709, "grad_norm": 0.578125, "learning_rate": 0.00199542995387323, "loss": 0.3093, "step": 4050 }, { "epoch": 0.0071845309176855245, "grad_norm": 1.8046875, "learning_rate": 0.0019954239615254995, "loss": 0.3945, "step": 4052 }, { "epoch": 0.007188077082995339, "grad_norm": 0.419921875, "learning_rate": 0.0019954179652617045, "loss": 0.2524, "step": 4054 }, { "epoch": 0.0071916232483051545, "grad_norm": 0.291015625, "learning_rate": 0.0019954119650818715, "loss": 0.3658, "step": 4056 }, { "epoch": 0.00719516941361497, "grad_norm": 0.302734375, "learning_rate": 0.001995405960986027, "loss": 0.2964, "step": 4058 }, { "epoch": 0.007198715578924785, "grad_norm": 0.21484375, "learning_rate": 0.001995399952974196, "loss": 0.2584, "step": 4060 }, { "epoch": 0.0072022617442346, "grad_norm": 1.2421875, "learning_rate": 0.0019953939410464064, "loss": 0.4, "step": 4062 }, { "epoch": 0.007205807909544415, "grad_norm": 0.310546875, "learning_rate": 0.001995387925202684, "loss": 0.2817, "step": 4064 }, { "epoch": 0.007209354074854231, "grad_norm": 0.55859375, "learning_rate": 0.0019953819054430544, "loss": 0.3777, "step": 4066 }, { "epoch": 0.007212900240164045, "grad_norm": 0.3984375, "learning_rate": 0.0019953758817675446, "loss": 0.2462, "step": 4068 }, { "epoch": 0.007216446405473861, "grad_norm": 0.375, "learning_rate": 0.001995369854176181, "loss": 0.4524, "step": 4070 }, { "epoch": 0.007219992570783676, "grad_norm": 0.203125, "learning_rate": 0.00199536382266899, "loss": 0.2715, "step": 4072 }, { "epoch": 0.007223538736093491, "grad_norm": 0.453125, "learning_rate": 0.001995357787245997, "loss": 0.2618, "step": 4074 }, { "epoch": 0.007227084901403306, "grad_norm": 0.494140625, "learning_rate": 0.0019953517479072294, "loss": 0.4218, "step": 4076 }, { "epoch": 0.0072306310667131215, "grad_norm": 0.49609375, "learning_rate": 0.0019953457046527133, "loss": 0.2579, "step": 4078 }, { "epoch": 0.007234177232022937, "grad_norm": 0.294921875, "learning_rate": 0.001995339657482475, "loss": 0.2531, "step": 4080 }, { "epoch": 0.0072377233973327515, "grad_norm": 0.421875, "learning_rate": 0.0019953336063965417, "loss": 0.2742, "step": 4082 }, { "epoch": 0.007241269562642567, "grad_norm": 0.53125, "learning_rate": 0.001995327551394939, "loss": 0.2189, "step": 4084 }, { "epoch": 0.007244815727952382, "grad_norm": 0.984375, "learning_rate": 0.0019953214924776936, "loss": 0.3908, "step": 4086 }, { "epoch": 0.007248361893262197, "grad_norm": 0.263671875, "learning_rate": 0.001995315429644832, "loss": 0.2772, "step": 4088 }, { "epoch": 0.007251908058572012, "grad_norm": 1.203125, "learning_rate": 0.001995309362896381, "loss": 0.3453, "step": 4090 }, { "epoch": 0.007255454223881828, "grad_norm": 0.859375, "learning_rate": 0.0019953032922323667, "loss": 0.3992, "step": 4092 }, { "epoch": 0.007259000389191643, "grad_norm": 0.51953125, "learning_rate": 0.001995297217652816, "loss": 0.3004, "step": 4094 }, { "epoch": 0.007262546554501458, "grad_norm": 1.1015625, "learning_rate": 0.0019952911391577554, "loss": 0.5569, "step": 4096 }, { "epoch": 0.007266092719811273, "grad_norm": 0.388671875, "learning_rate": 0.001995285056747211, "loss": 0.3472, "step": 4098 }, { "epoch": 0.0072696388851210885, "grad_norm": 0.77734375, "learning_rate": 0.00199527897042121, "loss": 0.4015, "step": 4100 }, { "epoch": 0.007273185050430903, "grad_norm": 0.365234375, "learning_rate": 0.0019952728801797786, "loss": 0.264, "step": 4102 }, { "epoch": 0.0072767312157407185, "grad_norm": 0.5546875, "learning_rate": 0.001995266786022944, "loss": 0.38, "step": 4104 }, { "epoch": 0.007280277381050534, "grad_norm": 0.259765625, "learning_rate": 0.0019952606879507324, "loss": 0.2326, "step": 4106 }, { "epoch": 0.0072838235463603485, "grad_norm": 0.53515625, "learning_rate": 0.001995254585963171, "loss": 0.2613, "step": 4108 }, { "epoch": 0.007287369711670164, "grad_norm": 0.341796875, "learning_rate": 0.0019952484800602856, "loss": 0.2423, "step": 4110 }, { "epoch": 0.007290915876979979, "grad_norm": 1.046875, "learning_rate": 0.0019952423702421034, "loss": 0.2558, "step": 4112 }, { "epoch": 0.007294462042289795, "grad_norm": 0.30859375, "learning_rate": 0.001995236256508651, "loss": 0.2847, "step": 4114 }, { "epoch": 0.007298008207599609, "grad_norm": 0.470703125, "learning_rate": 0.0019952301388599554, "loss": 0.3737, "step": 4116 }, { "epoch": 0.007301554372909425, "grad_norm": 0.30859375, "learning_rate": 0.0019952240172960434, "loss": 0.2665, "step": 4118 }, { "epoch": 0.00730510053821924, "grad_norm": 0.67578125, "learning_rate": 0.001995217891816941, "loss": 0.2976, "step": 4120 }, { "epoch": 0.007308646703529055, "grad_norm": 0.796875, "learning_rate": 0.001995211762422676, "loss": 0.3633, "step": 4122 }, { "epoch": 0.00731219286883887, "grad_norm": 0.6015625, "learning_rate": 0.001995205629113275, "loss": 0.2471, "step": 4124 }, { "epoch": 0.0073157390341486855, "grad_norm": 0.82421875, "learning_rate": 0.001995199491888764, "loss": 0.3493, "step": 4126 }, { "epoch": 0.007319285199458501, "grad_norm": 0.55078125, "learning_rate": 0.0019951933507491703, "loss": 0.2472, "step": 4128 }, { "epoch": 0.0073228313647683155, "grad_norm": 0.34765625, "learning_rate": 0.001995187205694521, "loss": 0.2807, "step": 4130 }, { "epoch": 0.007326377530078131, "grad_norm": 0.703125, "learning_rate": 0.001995181056724843, "loss": 0.2845, "step": 4132 }, { "epoch": 0.007329923695387946, "grad_norm": 0.57421875, "learning_rate": 0.0019951749038401635, "loss": 0.4949, "step": 4134 }, { "epoch": 0.007333469860697761, "grad_norm": 2.03125, "learning_rate": 0.0019951687470405083, "loss": 0.2666, "step": 4136 }, { "epoch": 0.007337016026007576, "grad_norm": 1.2890625, "learning_rate": 0.0019951625863259053, "loss": 0.3614, "step": 4138 }, { "epoch": 0.007340562191317392, "grad_norm": 0.427734375, "learning_rate": 0.001995156421696381, "loss": 0.3208, "step": 4140 }, { "epoch": 0.007344108356627206, "grad_norm": 0.255859375, "learning_rate": 0.0019951502531519627, "loss": 0.2495, "step": 4142 }, { "epoch": 0.007347654521937022, "grad_norm": 0.58984375, "learning_rate": 0.001995144080692677, "loss": 0.3306, "step": 4144 }, { "epoch": 0.007351200687246837, "grad_norm": 0.4921875, "learning_rate": 0.0019951379043185507, "loss": 0.3835, "step": 4146 }, { "epoch": 0.0073547468525566526, "grad_norm": 0.439453125, "learning_rate": 0.0019951317240296117, "loss": 0.4441, "step": 4148 }, { "epoch": 0.007358293017866467, "grad_norm": 0.6328125, "learning_rate": 0.001995125539825886, "loss": 0.2542, "step": 4150 }, { "epoch": 0.0073618391831762825, "grad_norm": 0.369140625, "learning_rate": 0.0019951193517074015, "loss": 0.3727, "step": 4152 }, { "epoch": 0.007365385348486098, "grad_norm": 0.228515625, "learning_rate": 0.0019951131596741846, "loss": 0.2044, "step": 4154 }, { "epoch": 0.0073689315137959125, "grad_norm": 0.6171875, "learning_rate": 0.0019951069637262633, "loss": 0.2646, "step": 4156 }, { "epoch": 0.007372477679105728, "grad_norm": 0.4296875, "learning_rate": 0.0019951007638636634, "loss": 0.3063, "step": 4158 }, { "epoch": 0.007376023844415543, "grad_norm": 0.80078125, "learning_rate": 0.001995094560086413, "loss": 0.5668, "step": 4160 }, { "epoch": 0.007379570009725359, "grad_norm": 0.28515625, "learning_rate": 0.001995088352394539, "loss": 0.331, "step": 4162 }, { "epoch": 0.007383116175035173, "grad_norm": 0.578125, "learning_rate": 0.0019950821407880683, "loss": 0.2038, "step": 4164 }, { "epoch": 0.007386662340344989, "grad_norm": 0.859375, "learning_rate": 0.001995075925267028, "loss": 0.275, "step": 4166 }, { "epoch": 0.007390208505654804, "grad_norm": 0.39453125, "learning_rate": 0.001995069705831446, "loss": 0.291, "step": 4168 }, { "epoch": 0.007393754670964619, "grad_norm": 0.23046875, "learning_rate": 0.0019950634824813488, "loss": 0.2342, "step": 4170 }, { "epoch": 0.007397300836274434, "grad_norm": 0.3984375, "learning_rate": 0.0019950572552167637, "loss": 0.2628, "step": 4172 }, { "epoch": 0.0074008470015842496, "grad_norm": 0.337890625, "learning_rate": 0.0019950510240377187, "loss": 0.2843, "step": 4174 }, { "epoch": 0.007404393166894064, "grad_norm": 0.54296875, "learning_rate": 0.00199504478894424, "loss": 0.3176, "step": 4176 }, { "epoch": 0.0074079393322038795, "grad_norm": 0.333984375, "learning_rate": 0.0019950385499363553, "loss": 0.2461, "step": 4178 }, { "epoch": 0.007411485497513695, "grad_norm": 0.703125, "learning_rate": 0.0019950323070140915, "loss": 0.4238, "step": 4180 }, { "epoch": 0.00741503166282351, "grad_norm": 0.265625, "learning_rate": 0.0019950260601774767, "loss": 0.1986, "step": 4182 }, { "epoch": 0.007418577828133325, "grad_norm": 0.357421875, "learning_rate": 0.0019950198094265377, "loss": 0.2889, "step": 4184 }, { "epoch": 0.00742212399344314, "grad_norm": 3.609375, "learning_rate": 0.0019950135547613023, "loss": 0.3667, "step": 4186 }, { "epoch": 0.007425670158752956, "grad_norm": 0.4140625, "learning_rate": 0.001995007296181797, "loss": 0.2688, "step": 4188 }, { "epoch": 0.00742921632406277, "grad_norm": 0.2265625, "learning_rate": 0.00199500103368805, "loss": 0.2974, "step": 4190 }, { "epoch": 0.007432762489372586, "grad_norm": 0.408203125, "learning_rate": 0.0019949947672800884, "loss": 0.3078, "step": 4192 }, { "epoch": 0.007436308654682401, "grad_norm": 0.7421875, "learning_rate": 0.0019949884969579393, "loss": 0.2632, "step": 4194 }, { "epoch": 0.007439854819992217, "grad_norm": 0.83984375, "learning_rate": 0.0019949822227216304, "loss": 0.3044, "step": 4196 }, { "epoch": 0.007443400985302031, "grad_norm": 0.64453125, "learning_rate": 0.0019949759445711895, "loss": 0.2697, "step": 4198 }, { "epoch": 0.0074469471506118465, "grad_norm": 0.341796875, "learning_rate": 0.001994969662506643, "loss": 0.2577, "step": 4200 }, { "epoch": 0.007450493315921662, "grad_norm": 0.474609375, "learning_rate": 0.00199496337652802, "loss": 0.3347, "step": 4202 }, { "epoch": 0.0074540394812314765, "grad_norm": 0.35546875, "learning_rate": 0.0019949570866353464, "loss": 0.3018, "step": 4204 }, { "epoch": 0.007457585646541292, "grad_norm": 0.5546875, "learning_rate": 0.0019949507928286505, "loss": 0.2547, "step": 4206 }, { "epoch": 0.007461131811851107, "grad_norm": 0.91796875, "learning_rate": 0.001994944495107959, "loss": 0.4746, "step": 4208 }, { "epoch": 0.007464677977160922, "grad_norm": 0.546875, "learning_rate": 0.001994938193473301, "loss": 0.2632, "step": 4210 }, { "epoch": 0.007468224142470737, "grad_norm": 0.2294921875, "learning_rate": 0.001994931887924703, "loss": 0.2509, "step": 4212 }, { "epoch": 0.007471770307780553, "grad_norm": 0.30859375, "learning_rate": 0.001994925578462193, "loss": 0.2849, "step": 4214 }, { "epoch": 0.007475316473090368, "grad_norm": 0.263671875, "learning_rate": 0.001994919265085798, "loss": 0.2497, "step": 4216 }, { "epoch": 0.007478862638400183, "grad_norm": 0.345703125, "learning_rate": 0.0019949129477955458, "loss": 0.2916, "step": 4218 }, { "epoch": 0.007482408803709998, "grad_norm": 0.26953125, "learning_rate": 0.0019949066265914648, "loss": 0.2228, "step": 4220 }, { "epoch": 0.007485954969019814, "grad_norm": 0.28125, "learning_rate": 0.0019949003014735813, "loss": 0.2561, "step": 4222 }, { "epoch": 0.007489501134329628, "grad_norm": 0.490234375, "learning_rate": 0.001994893972441924, "loss": 0.3268, "step": 4224 }, { "epoch": 0.0074930472996394435, "grad_norm": 1.4609375, "learning_rate": 0.0019948876394965207, "loss": 0.5349, "step": 4226 }, { "epoch": 0.007496593464949259, "grad_norm": 0.38671875, "learning_rate": 0.0019948813026373978, "loss": 0.4591, "step": 4228 }, { "epoch": 0.007500139630259074, "grad_norm": 0.58984375, "learning_rate": 0.0019948749618645843, "loss": 0.2819, "step": 4230 }, { "epoch": 0.007503685795568889, "grad_norm": 0.396484375, "learning_rate": 0.001994868617178108, "loss": 0.2577, "step": 4232 }, { "epoch": 0.007507231960878704, "grad_norm": 1.75, "learning_rate": 0.001994862268577996, "loss": 0.2379, "step": 4234 }, { "epoch": 0.00751077812618852, "grad_norm": 0.4140625, "learning_rate": 0.001994855916064276, "loss": 0.325, "step": 4236 }, { "epoch": 0.007514324291498334, "grad_norm": 0.267578125, "learning_rate": 0.0019948495596369756, "loss": 0.2582, "step": 4238 }, { "epoch": 0.00751787045680815, "grad_norm": 0.259765625, "learning_rate": 0.001994843199296124, "loss": 0.3427, "step": 4240 }, { "epoch": 0.007521416622117965, "grad_norm": 0.6171875, "learning_rate": 0.0019948368350417474, "loss": 0.2594, "step": 4242 }, { "epoch": 0.00752496278742778, "grad_norm": 0.482421875, "learning_rate": 0.001994830466873874, "loss": 0.2235, "step": 4244 }, { "epoch": 0.007528508952737595, "grad_norm": 0.62109375, "learning_rate": 0.0019948240947925324, "loss": 0.3591, "step": 4246 }, { "epoch": 0.0075320551180474106, "grad_norm": 0.5546875, "learning_rate": 0.00199481771879775, "loss": 0.2935, "step": 4248 }, { "epoch": 0.007535601283357226, "grad_norm": 0.333984375, "learning_rate": 0.0019948113388895544, "loss": 0.249, "step": 4250 }, { "epoch": 0.0075391474486670405, "grad_norm": 0.7109375, "learning_rate": 0.0019948049550679737, "loss": 0.2767, "step": 4252 }, { "epoch": 0.007542693613976856, "grad_norm": 0.361328125, "learning_rate": 0.0019947985673330363, "loss": 0.3592, "step": 4254 }, { "epoch": 0.007546239779286671, "grad_norm": 0.46484375, "learning_rate": 0.0019947921756847697, "loss": 0.374, "step": 4256 }, { "epoch": 0.007549785944596486, "grad_norm": 0.291015625, "learning_rate": 0.0019947857801232015, "loss": 0.3037, "step": 4258 }, { "epoch": 0.007553332109906301, "grad_norm": 3.59375, "learning_rate": 0.0019947793806483604, "loss": 0.6666, "step": 4260 }, { "epoch": 0.007556878275216117, "grad_norm": 0.341796875, "learning_rate": 0.0019947729772602737, "loss": 0.221, "step": 4262 }, { "epoch": 0.007560424440525932, "grad_norm": 0.53515625, "learning_rate": 0.00199476656995897, "loss": 0.2994, "step": 4264 }, { "epoch": 0.007563970605835747, "grad_norm": 0.5859375, "learning_rate": 0.001994760158744477, "loss": 0.2465, "step": 4266 }, { "epoch": 0.007567516771145562, "grad_norm": 1.78125, "learning_rate": 0.001994753743616823, "loss": 0.2681, "step": 4268 }, { "epoch": 0.007571062936455378, "grad_norm": 0.484375, "learning_rate": 0.0019947473245760356, "loss": 0.4576, "step": 4270 }, { "epoch": 0.007574609101765192, "grad_norm": 0.5625, "learning_rate": 0.0019947409016221433, "loss": 0.2747, "step": 4272 }, { "epoch": 0.0075781552670750076, "grad_norm": 0.69921875, "learning_rate": 0.0019947344747551737, "loss": 0.4057, "step": 4274 }, { "epoch": 0.007581701432384823, "grad_norm": 1.6796875, "learning_rate": 0.001994728043975156, "loss": 0.3244, "step": 4276 }, { "epoch": 0.0075852475976946375, "grad_norm": 0.578125, "learning_rate": 0.0019947216092821166, "loss": 0.2432, "step": 4278 }, { "epoch": 0.007588793763004453, "grad_norm": 0.494140625, "learning_rate": 0.001994715170676085, "loss": 0.4124, "step": 4280 }, { "epoch": 0.007592339928314268, "grad_norm": 1.8203125, "learning_rate": 0.0019947087281570893, "loss": 0.3114, "step": 4282 }, { "epoch": 0.007595886093624084, "grad_norm": 0.263671875, "learning_rate": 0.001994702281725157, "loss": 0.266, "step": 4284 }, { "epoch": 0.007599432258933898, "grad_norm": 0.625, "learning_rate": 0.0019946958313803165, "loss": 0.2568, "step": 4286 }, { "epoch": 0.007602978424243714, "grad_norm": 0.6328125, "learning_rate": 0.001994689377122596, "loss": 0.3541, "step": 4288 }, { "epoch": 0.007606524589553529, "grad_norm": 0.3125, "learning_rate": 0.001994682918952024, "loss": 0.2765, "step": 4290 }, { "epoch": 0.007610070754863344, "grad_norm": 0.86328125, "learning_rate": 0.0019946764568686288, "loss": 0.3133, "step": 4292 }, { "epoch": 0.007613616920173159, "grad_norm": 0.416015625, "learning_rate": 0.0019946699908724385, "loss": 0.2714, "step": 4294 }, { "epoch": 0.007617163085482975, "grad_norm": 0.31640625, "learning_rate": 0.0019946635209634814, "loss": 0.2529, "step": 4296 }, { "epoch": 0.00762070925079279, "grad_norm": 0.60546875, "learning_rate": 0.0019946570471417856, "loss": 0.4352, "step": 4298 }, { "epoch": 0.0076242554161026046, "grad_norm": 0.28125, "learning_rate": 0.0019946505694073795, "loss": 0.2255, "step": 4300 }, { "epoch": 0.00762780158141242, "grad_norm": 0.58984375, "learning_rate": 0.0019946440877602915, "loss": 0.3296, "step": 4302 }, { "epoch": 0.007631347746722235, "grad_norm": 0.25, "learning_rate": 0.00199463760220055, "loss": 0.3566, "step": 4304 }, { "epoch": 0.00763489391203205, "grad_norm": 0.400390625, "learning_rate": 0.0019946311127281833, "loss": 0.2687, "step": 4306 }, { "epoch": 0.007638440077341865, "grad_norm": 0.21484375, "learning_rate": 0.0019946246193432195, "loss": 0.2209, "step": 4308 }, { "epoch": 0.007641986242651681, "grad_norm": 0.375, "learning_rate": 0.0019946181220456874, "loss": 0.2465, "step": 4310 }, { "epoch": 0.007645532407961495, "grad_norm": 0.73046875, "learning_rate": 0.0019946116208356154, "loss": 0.2641, "step": 4312 }, { "epoch": 0.007649078573271311, "grad_norm": 0.283203125, "learning_rate": 0.001994605115713032, "loss": 0.2838, "step": 4314 }, { "epoch": 0.007652624738581126, "grad_norm": 0.287109375, "learning_rate": 0.0019945986066779654, "loss": 0.3213, "step": 4316 }, { "epoch": 0.007656170903890942, "grad_norm": 0.27734375, "learning_rate": 0.001994592093730444, "loss": 0.2789, "step": 4318 }, { "epoch": 0.007659717069200756, "grad_norm": 1.0390625, "learning_rate": 0.0019945855768704964, "loss": 0.4544, "step": 4320 }, { "epoch": 0.007663263234510572, "grad_norm": 0.38671875, "learning_rate": 0.001994579056098151, "loss": 0.2123, "step": 4322 }, { "epoch": 0.007666809399820387, "grad_norm": 0.33984375, "learning_rate": 0.0019945725314134367, "loss": 0.253, "step": 4324 }, { "epoch": 0.0076703555651302015, "grad_norm": 0.2021484375, "learning_rate": 0.001994566002816382, "loss": 0.2218, "step": 4326 }, { "epoch": 0.007673901730440017, "grad_norm": 0.64453125, "learning_rate": 0.0019945594703070146, "loss": 0.2866, "step": 4328 }, { "epoch": 0.007677447895749832, "grad_norm": 0.80859375, "learning_rate": 0.001994552933885364, "loss": 0.2576, "step": 4330 }, { "epoch": 0.007680994061059648, "grad_norm": 1.015625, "learning_rate": 0.0019945463935514586, "loss": 0.2613, "step": 4332 }, { "epoch": 0.007684540226369462, "grad_norm": 0.453125, "learning_rate": 0.0019945398493053266, "loss": 0.4501, "step": 4334 }, { "epoch": 0.007688086391679278, "grad_norm": 0.98046875, "learning_rate": 0.0019945333011469973, "loss": 0.2772, "step": 4336 }, { "epoch": 0.007691632556989093, "grad_norm": 0.333984375, "learning_rate": 0.0019945267490764987, "loss": 0.247, "step": 4338 }, { "epoch": 0.007695178722298908, "grad_norm": 0.53125, "learning_rate": 0.00199452019309386, "loss": 0.2608, "step": 4340 }, { "epoch": 0.007698724887608723, "grad_norm": 4.09375, "learning_rate": 0.0019945136331991093, "loss": 0.364, "step": 4342 }, { "epoch": 0.007702271052918539, "grad_norm": 0.220703125, "learning_rate": 0.0019945070693922757, "loss": 0.2504, "step": 4344 }, { "epoch": 0.007705817218228353, "grad_norm": 0.3515625, "learning_rate": 0.0019945005016733875, "loss": 0.2802, "step": 4346 }, { "epoch": 0.0077093633835381686, "grad_norm": 0.37890625, "learning_rate": 0.001994493930042474, "loss": 0.2524, "step": 4348 }, { "epoch": 0.007712909548847984, "grad_norm": 0.7265625, "learning_rate": 0.001994487354499563, "loss": 0.2936, "step": 4350 }, { "epoch": 0.007716455714157799, "grad_norm": 1.2890625, "learning_rate": 0.0019944807750446845, "loss": 0.4244, "step": 4352 }, { "epoch": 0.007720001879467614, "grad_norm": 0.68359375, "learning_rate": 0.0019944741916778667, "loss": 0.3, "step": 4354 }, { "epoch": 0.007723548044777429, "grad_norm": 0.279296875, "learning_rate": 0.001994467604399138, "loss": 0.2836, "step": 4356 }, { "epoch": 0.007727094210087245, "grad_norm": 0.57421875, "learning_rate": 0.001994461013208528, "loss": 0.3151, "step": 4358 }, { "epoch": 0.007730640375397059, "grad_norm": 0.37109375, "learning_rate": 0.001994454418106065, "loss": 0.3651, "step": 4360 }, { "epoch": 0.007734186540706875, "grad_norm": 0.87109375, "learning_rate": 0.001994447819091778, "loss": 0.4105, "step": 4362 }, { "epoch": 0.00773773270601669, "grad_norm": 1.25, "learning_rate": 0.001994441216165695, "loss": 0.2935, "step": 4364 }, { "epoch": 0.007741278871326506, "grad_norm": 0.34765625, "learning_rate": 0.0019944346093278466, "loss": 0.2232, "step": 4366 }, { "epoch": 0.00774482503663632, "grad_norm": 0.69921875, "learning_rate": 0.0019944279985782605, "loss": 0.3407, "step": 4368 }, { "epoch": 0.007748371201946136, "grad_norm": 2.015625, "learning_rate": 0.0019944213839169656, "loss": 0.2508, "step": 4370 }, { "epoch": 0.007751917367255951, "grad_norm": 0.625, "learning_rate": 0.0019944147653439917, "loss": 0.3536, "step": 4372 }, { "epoch": 0.0077554635325657656, "grad_norm": 0.345703125, "learning_rate": 0.0019944081428593667, "loss": 0.4257, "step": 4374 }, { "epoch": 0.007759009697875581, "grad_norm": 0.294921875, "learning_rate": 0.00199440151646312, "loss": 0.2985, "step": 4376 }, { "epoch": 0.007762555863185396, "grad_norm": 0.337890625, "learning_rate": 0.0019943948861552807, "loss": 0.2368, "step": 4378 }, { "epoch": 0.007766102028495211, "grad_norm": 0.251953125, "learning_rate": 0.0019943882519358777, "loss": 0.2776, "step": 4380 }, { "epoch": 0.007769648193805026, "grad_norm": 0.3203125, "learning_rate": 0.00199438161380494, "loss": 0.2795, "step": 4382 }, { "epoch": 0.007773194359114842, "grad_norm": 0.330078125, "learning_rate": 0.0019943749717624966, "loss": 0.2514, "step": 4384 }, { "epoch": 0.007776740524424657, "grad_norm": 0.31640625, "learning_rate": 0.0019943683258085766, "loss": 0.2623, "step": 4386 }, { "epoch": 0.007780286689734472, "grad_norm": 0.5078125, "learning_rate": 0.001994361675943209, "loss": 0.5157, "step": 4388 }, { "epoch": 0.007783832855044287, "grad_norm": 0.5390625, "learning_rate": 0.001994355022166423, "loss": 0.374, "step": 4390 }, { "epoch": 0.007787379020354103, "grad_norm": 0.609375, "learning_rate": 0.001994348364478248, "loss": 0.3461, "step": 4392 }, { "epoch": 0.007790925185663917, "grad_norm": 0.318359375, "learning_rate": 0.001994341702878712, "loss": 0.2688, "step": 4394 }, { "epoch": 0.007794471350973733, "grad_norm": 2.609375, "learning_rate": 0.0019943350373678452, "loss": 0.4908, "step": 4396 }, { "epoch": 0.007798017516283548, "grad_norm": 0.1962890625, "learning_rate": 0.0019943283679456766, "loss": 0.2465, "step": 4398 }, { "epoch": 0.007801563681593363, "grad_norm": 0.58984375, "learning_rate": 0.001994321694612235, "loss": 0.2208, "step": 4400 }, { "epoch": 0.007805109846903178, "grad_norm": 2.109375, "learning_rate": 0.00199431501736755, "loss": 0.3789, "step": 4402 }, { "epoch": 0.007808656012212993, "grad_norm": 0.96484375, "learning_rate": 0.0019943083362116507, "loss": 0.2863, "step": 4404 }, { "epoch": 0.007812202177522809, "grad_norm": 0.31640625, "learning_rate": 0.001994301651144566, "loss": 0.2622, "step": 4406 }, { "epoch": 0.007815748342832624, "grad_norm": 0.255859375, "learning_rate": 0.0019942949621663255, "loss": 0.3736, "step": 4408 }, { "epoch": 0.007819294508142439, "grad_norm": 0.2890625, "learning_rate": 0.0019942882692769582, "loss": 0.2169, "step": 4410 }, { "epoch": 0.007822840673452253, "grad_norm": 0.8984375, "learning_rate": 0.0019942815724764934, "loss": 0.3357, "step": 4412 }, { "epoch": 0.00782638683876207, "grad_norm": 0.80078125, "learning_rate": 0.0019942748717649604, "loss": 0.2929, "step": 4414 }, { "epoch": 0.007829933004071884, "grad_norm": 0.271484375, "learning_rate": 0.001994268167142389, "loss": 0.2081, "step": 4416 }, { "epoch": 0.007833479169381699, "grad_norm": 0.306640625, "learning_rate": 0.001994261458608808, "loss": 0.3851, "step": 4418 }, { "epoch": 0.007837025334691515, "grad_norm": 0.267578125, "learning_rate": 0.0019942547461642463, "loss": 0.2196, "step": 4420 }, { "epoch": 0.00784057150000133, "grad_norm": 1.34375, "learning_rate": 0.001994248029808734, "loss": 0.3409, "step": 4422 }, { "epoch": 0.007844117665311144, "grad_norm": 0.9609375, "learning_rate": 0.0019942413095423005, "loss": 0.2777, "step": 4424 }, { "epoch": 0.00784766383062096, "grad_norm": 0.5234375, "learning_rate": 0.001994234585364975, "loss": 0.2788, "step": 4426 }, { "epoch": 0.007851209995930775, "grad_norm": 0.7890625, "learning_rate": 0.0019942278572767863, "loss": 0.2346, "step": 4428 }, { "epoch": 0.007854756161240591, "grad_norm": 0.796875, "learning_rate": 0.0019942211252777647, "loss": 0.5888, "step": 4430 }, { "epoch": 0.007858302326550406, "grad_norm": 0.37890625, "learning_rate": 0.0019942143893679396, "loss": 0.2735, "step": 4432 }, { "epoch": 0.00786184849186022, "grad_norm": 0.392578125, "learning_rate": 0.00199420764954734, "loss": 0.3055, "step": 4434 }, { "epoch": 0.007865394657170037, "grad_norm": 0.443359375, "learning_rate": 0.0019942009058159954, "loss": 0.272, "step": 4436 }, { "epoch": 0.007868940822479851, "grad_norm": 0.1787109375, "learning_rate": 0.0019941941581739357, "loss": 0.2479, "step": 4438 }, { "epoch": 0.007872486987789666, "grad_norm": 0.388671875, "learning_rate": 0.00199418740662119, "loss": 0.3031, "step": 4440 }, { "epoch": 0.007876033153099482, "grad_norm": 0.337890625, "learning_rate": 0.001994180651157788, "loss": 0.2757, "step": 4442 }, { "epoch": 0.007879579318409297, "grad_norm": 0.73046875, "learning_rate": 0.001994173891783759, "loss": 0.4637, "step": 4444 }, { "epoch": 0.007883125483719111, "grad_norm": 0.353515625, "learning_rate": 0.001994167128499133, "loss": 0.3172, "step": 4446 }, { "epoch": 0.007886671649028927, "grad_norm": 1.2890625, "learning_rate": 0.0019941603613039395, "loss": 0.2998, "step": 4448 }, { "epoch": 0.007890217814338742, "grad_norm": 0.2734375, "learning_rate": 0.001994153590198208, "loss": 0.2892, "step": 4450 }, { "epoch": 0.007893763979648557, "grad_norm": 0.6328125, "learning_rate": 0.001994146815181968, "loss": 0.3114, "step": 4452 }, { "epoch": 0.007897310144958373, "grad_norm": 0.3671875, "learning_rate": 0.0019941400362552494, "loss": 0.3865, "step": 4454 }, { "epoch": 0.007900856310268187, "grad_norm": 0.341796875, "learning_rate": 0.0019941332534180816, "loss": 0.2601, "step": 4456 }, { "epoch": 0.007904402475578002, "grad_norm": 0.302734375, "learning_rate": 0.0019941264666704936, "loss": 0.2924, "step": 4458 }, { "epoch": 0.007907948640887818, "grad_norm": 0.62890625, "learning_rate": 0.0019941196760125167, "loss": 0.2698, "step": 4460 }, { "epoch": 0.007911494806197633, "grad_norm": 0.388671875, "learning_rate": 0.001994112881444179, "loss": 0.3651, "step": 4462 }, { "epoch": 0.007915040971507449, "grad_norm": 0.36328125, "learning_rate": 0.0019941060829655115, "loss": 0.2912, "step": 4464 }, { "epoch": 0.007918587136817264, "grad_norm": 0.58203125, "learning_rate": 0.0019940992805765434, "loss": 0.2526, "step": 4466 }, { "epoch": 0.007922133302127078, "grad_norm": 0.2158203125, "learning_rate": 0.001994092474277304, "loss": 0.2764, "step": 4468 }, { "epoch": 0.007925679467436894, "grad_norm": 1.125, "learning_rate": 0.0019940856640678233, "loss": 0.2922, "step": 4470 }, { "epoch": 0.007929225632746709, "grad_norm": 0.263671875, "learning_rate": 0.001994078849948132, "loss": 0.2557, "step": 4472 }, { "epoch": 0.007932771798056524, "grad_norm": 0.61328125, "learning_rate": 0.0019940720319182583, "loss": 0.2532, "step": 4474 }, { "epoch": 0.00793631796336634, "grad_norm": 1.109375, "learning_rate": 0.0019940652099782333, "loss": 0.3192, "step": 4476 }, { "epoch": 0.007939864128676154, "grad_norm": 0.32421875, "learning_rate": 0.0019940583841280865, "loss": 0.2245, "step": 4478 }, { "epoch": 0.007943410293985969, "grad_norm": 0.494140625, "learning_rate": 0.001994051554367848, "loss": 0.3043, "step": 4480 }, { "epoch": 0.007946956459295785, "grad_norm": 1.046875, "learning_rate": 0.0019940447206975467, "loss": 0.2861, "step": 4482 }, { "epoch": 0.0079505026246056, "grad_norm": 0.416015625, "learning_rate": 0.001994037883117213, "loss": 0.2432, "step": 4484 }, { "epoch": 0.007954048789915414, "grad_norm": 0.337890625, "learning_rate": 0.001994031041626877, "loss": 0.2839, "step": 4486 }, { "epoch": 0.00795759495522523, "grad_norm": 0.73828125, "learning_rate": 0.001994024196226569, "loss": 0.3339, "step": 4488 }, { "epoch": 0.007961141120535045, "grad_norm": 0.435546875, "learning_rate": 0.0019940173469163184, "loss": 0.2935, "step": 4490 }, { "epoch": 0.00796468728584486, "grad_norm": 0.37109375, "learning_rate": 0.001994010493696155, "loss": 0.2689, "step": 4492 }, { "epoch": 0.007968233451154676, "grad_norm": 1.1796875, "learning_rate": 0.001994003636566109, "loss": 0.614, "step": 4494 }, { "epoch": 0.00797177961646449, "grad_norm": 0.53125, "learning_rate": 0.0019939967755262106, "loss": 0.4595, "step": 4496 }, { "epoch": 0.007975325781774307, "grad_norm": 0.44921875, "learning_rate": 0.001993989910576489, "loss": 0.3048, "step": 4498 }, { "epoch": 0.007978871947084121, "grad_norm": 0.484375, "learning_rate": 0.0019939830417169757, "loss": 0.3015, "step": 4500 }, { "epoch": 0.007982418112393936, "grad_norm": 0.9921875, "learning_rate": 0.0019939761689476993, "loss": 0.3926, "step": 4502 }, { "epoch": 0.007985964277703752, "grad_norm": 0.30859375, "learning_rate": 0.0019939692922686905, "loss": 0.2454, "step": 4504 }, { "epoch": 0.007989510443013567, "grad_norm": 2.28125, "learning_rate": 0.0019939624116799793, "loss": 0.2455, "step": 4506 }, { "epoch": 0.007993056608323381, "grad_norm": 0.306640625, "learning_rate": 0.001993955527181596, "loss": 0.2571, "step": 4508 }, { "epoch": 0.007996602773633198, "grad_norm": 0.8125, "learning_rate": 0.0019939486387735702, "loss": 0.4529, "step": 4510 }, { "epoch": 0.008000148938943012, "grad_norm": 0.265625, "learning_rate": 0.0019939417464559326, "loss": 0.2354, "step": 4512 }, { "epoch": 0.008003695104252827, "grad_norm": 0.3984375, "learning_rate": 0.001993934850228713, "loss": 0.2899, "step": 4514 }, { "epoch": 0.008007241269562643, "grad_norm": 0.314453125, "learning_rate": 0.0019939279500919417, "loss": 0.2149, "step": 4516 }, { "epoch": 0.008010787434872458, "grad_norm": 0.2734375, "learning_rate": 0.0019939210460456487, "loss": 0.2415, "step": 4518 }, { "epoch": 0.008014333600182272, "grad_norm": 0.29296875, "learning_rate": 0.001993914138089864, "loss": 0.3355, "step": 4520 }, { "epoch": 0.008017879765492088, "grad_norm": 1.1328125, "learning_rate": 0.001993907226224618, "loss": 0.3126, "step": 4522 }, { "epoch": 0.008021425930801903, "grad_norm": 0.69921875, "learning_rate": 0.0019939003104499416, "loss": 0.2445, "step": 4524 }, { "epoch": 0.008024972096111718, "grad_norm": 0.4375, "learning_rate": 0.0019938933907658646, "loss": 0.2574, "step": 4526 }, { "epoch": 0.008028518261421534, "grad_norm": 1.421875, "learning_rate": 0.0019938864671724165, "loss": 0.324, "step": 4528 }, { "epoch": 0.008032064426731348, "grad_norm": 0.38671875, "learning_rate": 0.001993879539669629, "loss": 0.2308, "step": 4530 }, { "epoch": 0.008035610592041165, "grad_norm": 2.21875, "learning_rate": 0.001993872608257531, "loss": 0.2725, "step": 4532 }, { "epoch": 0.00803915675735098, "grad_norm": 0.96484375, "learning_rate": 0.0019938656729361535, "loss": 0.325, "step": 4534 }, { "epoch": 0.008042702922660794, "grad_norm": 0.18359375, "learning_rate": 0.001993858733705527, "loss": 0.3296, "step": 4536 }, { "epoch": 0.00804624908797061, "grad_norm": 0.349609375, "learning_rate": 0.0019938517905656815, "loss": 0.3021, "step": 4538 }, { "epoch": 0.008049795253280425, "grad_norm": 0.35546875, "learning_rate": 0.0019938448435166474, "loss": 0.3478, "step": 4540 }, { "epoch": 0.00805334141859024, "grad_norm": 0.212890625, "learning_rate": 0.001993837892558455, "loss": 0.3006, "step": 4542 }, { "epoch": 0.008056887583900055, "grad_norm": 0.5703125, "learning_rate": 0.001993830937691135, "loss": 0.2103, "step": 4544 }, { "epoch": 0.00806043374920987, "grad_norm": 0.671875, "learning_rate": 0.001993823978914718, "loss": 0.2476, "step": 4546 }, { "epoch": 0.008063979914519685, "grad_norm": 0.4296875, "learning_rate": 0.001993817016229234, "loss": 0.2564, "step": 4548 }, { "epoch": 0.0080675260798295, "grad_norm": 0.396484375, "learning_rate": 0.001993810049634713, "loss": 0.5138, "step": 4550 }, { "epoch": 0.008071072245139315, "grad_norm": 0.359375, "learning_rate": 0.0019938030791311866, "loss": 0.2639, "step": 4552 }, { "epoch": 0.00807461841044913, "grad_norm": 0.490234375, "learning_rate": 0.0019937961047186846, "loss": 0.3402, "step": 4554 }, { "epoch": 0.008078164575758946, "grad_norm": 0.51171875, "learning_rate": 0.0019937891263972374, "loss": 0.3265, "step": 4556 }, { "epoch": 0.00808171074106876, "grad_norm": 1.140625, "learning_rate": 0.0019937821441668763, "loss": 0.3948, "step": 4558 }, { "epoch": 0.008085256906378575, "grad_norm": 0.34765625, "learning_rate": 0.001993775158027631, "loss": 0.2407, "step": 4560 }, { "epoch": 0.008088803071688392, "grad_norm": 0.55859375, "learning_rate": 0.0019937681679795317, "loss": 0.3222, "step": 4562 }, { "epoch": 0.008092349236998206, "grad_norm": 0.2451171875, "learning_rate": 0.0019937611740226103, "loss": 0.2497, "step": 4564 }, { "epoch": 0.008095895402308022, "grad_norm": 0.46875, "learning_rate": 0.0019937541761568963, "loss": 0.2194, "step": 4566 }, { "epoch": 0.008099441567617837, "grad_norm": 0.62109375, "learning_rate": 0.001993747174382421, "loss": 0.3039, "step": 4568 }, { "epoch": 0.008102987732927652, "grad_norm": 0.1806640625, "learning_rate": 0.0019937401686992147, "loss": 0.2621, "step": 4570 }, { "epoch": 0.008106533898237468, "grad_norm": 0.470703125, "learning_rate": 0.0019937331591073078, "loss": 0.2045, "step": 4572 }, { "epoch": 0.008110080063547282, "grad_norm": 0.30078125, "learning_rate": 0.0019937261456067315, "loss": 0.294, "step": 4574 }, { "epoch": 0.008113626228857097, "grad_norm": 0.2734375, "learning_rate": 0.001993719128197516, "loss": 0.2457, "step": 4576 }, { "epoch": 0.008117172394166913, "grad_norm": 0.1962890625, "learning_rate": 0.0019937121068796925, "loss": 0.2523, "step": 4578 }, { "epoch": 0.008120718559476728, "grad_norm": 0.34375, "learning_rate": 0.001993705081653291, "loss": 0.2384, "step": 4580 }, { "epoch": 0.008124264724786542, "grad_norm": 0.208984375, "learning_rate": 0.0019936980525183425, "loss": 0.3318, "step": 4582 }, { "epoch": 0.008127810890096359, "grad_norm": 0.73828125, "learning_rate": 0.001993691019474878, "loss": 0.3099, "step": 4584 }, { "epoch": 0.008131357055406173, "grad_norm": 0.2041015625, "learning_rate": 0.001993683982522928, "loss": 0.3007, "step": 4586 }, { "epoch": 0.008134903220715988, "grad_norm": 0.373046875, "learning_rate": 0.0019936769416625238, "loss": 0.2842, "step": 4588 }, { "epoch": 0.008138449386025804, "grad_norm": 0.388671875, "learning_rate": 0.001993669896893695, "loss": 0.2214, "step": 4590 }, { "epoch": 0.008141995551335619, "grad_norm": 0.50390625, "learning_rate": 0.001993662848216474, "loss": 0.2985, "step": 4592 }, { "epoch": 0.008145541716645433, "grad_norm": 2.015625, "learning_rate": 0.0019936557956308906, "loss": 0.404, "step": 4594 }, { "epoch": 0.00814908788195525, "grad_norm": 2.171875, "learning_rate": 0.0019936487391369754, "loss": 0.3872, "step": 4596 }, { "epoch": 0.008152634047265064, "grad_norm": 2.078125, "learning_rate": 0.00199364167873476, "loss": 0.501, "step": 4598 }, { "epoch": 0.00815618021257488, "grad_norm": 0.55859375, "learning_rate": 0.001993634614424275, "loss": 0.2852, "step": 4600 }, { "epoch": 0.008159726377884695, "grad_norm": 0.63671875, "learning_rate": 0.0019936275462055513, "loss": 0.2374, "step": 4602 }, { "epoch": 0.00816327254319451, "grad_norm": 0.458984375, "learning_rate": 0.0019936204740786194, "loss": 0.2571, "step": 4604 }, { "epoch": 0.008166818708504326, "grad_norm": 0.8515625, "learning_rate": 0.0019936133980435113, "loss": 0.2724, "step": 4606 }, { "epoch": 0.00817036487381414, "grad_norm": 0.439453125, "learning_rate": 0.0019936063181002564, "loss": 0.2986, "step": 4608 }, { "epoch": 0.008173911039123955, "grad_norm": 0.99609375, "learning_rate": 0.0019935992342488876, "loss": 0.2031, "step": 4610 }, { "epoch": 0.008177457204433771, "grad_norm": 0.4296875, "learning_rate": 0.001993592146489434, "loss": 0.2599, "step": 4612 }, { "epoch": 0.008181003369743586, "grad_norm": 0.345703125, "learning_rate": 0.001993585054821928, "loss": 0.2936, "step": 4614 }, { "epoch": 0.0081845495350534, "grad_norm": 0.60546875, "learning_rate": 0.0019935779592463996, "loss": 0.2948, "step": 4616 }, { "epoch": 0.008188095700363216, "grad_norm": 0.2333984375, "learning_rate": 0.0019935708597628803, "loss": 0.329, "step": 4618 }, { "epoch": 0.008191641865673031, "grad_norm": 0.5625, "learning_rate": 0.001993563756371401, "loss": 0.259, "step": 4620 }, { "epoch": 0.008195188030982846, "grad_norm": 0.3671875, "learning_rate": 0.0019935566490719933, "loss": 0.2434, "step": 4622 }, { "epoch": 0.008198734196292662, "grad_norm": 0.306640625, "learning_rate": 0.0019935495378646875, "loss": 0.2276, "step": 4624 }, { "epoch": 0.008202280361602476, "grad_norm": 0.490234375, "learning_rate": 0.001993542422749515, "loss": 0.2696, "step": 4626 }, { "epoch": 0.008205826526912291, "grad_norm": 0.35546875, "learning_rate": 0.0019935353037265073, "loss": 0.2543, "step": 4628 }, { "epoch": 0.008209372692222107, "grad_norm": 1.171875, "learning_rate": 0.001993528180795695, "loss": 0.328, "step": 4630 }, { "epoch": 0.008212918857531922, "grad_norm": 1.296875, "learning_rate": 0.0019935210539571094, "loss": 0.2815, "step": 4632 }, { "epoch": 0.008216465022841738, "grad_norm": 0.369140625, "learning_rate": 0.0019935139232107823, "loss": 0.2748, "step": 4634 }, { "epoch": 0.008220011188151553, "grad_norm": 0.2392578125, "learning_rate": 0.0019935067885567437, "loss": 0.2366, "step": 4636 }, { "epoch": 0.008223557353461367, "grad_norm": 0.50390625, "learning_rate": 0.0019934996499950254, "loss": 0.2969, "step": 4638 }, { "epoch": 0.008227103518771183, "grad_norm": 0.447265625, "learning_rate": 0.001993492507525659, "loss": 0.1919, "step": 4640 }, { "epoch": 0.008230649684080998, "grad_norm": 0.494140625, "learning_rate": 0.0019934853611486753, "loss": 0.4802, "step": 4642 }, { "epoch": 0.008234195849390813, "grad_norm": 0.4921875, "learning_rate": 0.001993478210864105, "loss": 0.28, "step": 4644 }, { "epoch": 0.008237742014700629, "grad_norm": 0.62109375, "learning_rate": 0.0019934710566719806, "loss": 0.3622, "step": 4646 }, { "epoch": 0.008241288180010443, "grad_norm": 0.37109375, "learning_rate": 0.001993463898572333, "loss": 0.2593, "step": 4648 }, { "epoch": 0.008244834345320258, "grad_norm": 0.60546875, "learning_rate": 0.0019934567365651927, "loss": 0.2329, "step": 4650 }, { "epoch": 0.008248380510630074, "grad_norm": 0.38671875, "learning_rate": 0.001993449570650592, "loss": 0.2304, "step": 4652 }, { "epoch": 0.008251926675939889, "grad_norm": 1.0703125, "learning_rate": 0.001993442400828562, "loss": 0.3452, "step": 4654 }, { "epoch": 0.008255472841249703, "grad_norm": 0.515625, "learning_rate": 0.0019934352270991333, "loss": 0.2684, "step": 4656 }, { "epoch": 0.00825901900655952, "grad_norm": 0.30078125, "learning_rate": 0.0019934280494623385, "loss": 0.2466, "step": 4658 }, { "epoch": 0.008262565171869334, "grad_norm": 1.2421875, "learning_rate": 0.001993420867918208, "loss": 0.3082, "step": 4660 }, { "epoch": 0.008266111337179149, "grad_norm": 0.26171875, "learning_rate": 0.0019934136824667735, "loss": 0.2719, "step": 4662 }, { "epoch": 0.008269657502488965, "grad_norm": 0.408203125, "learning_rate": 0.001993406493108067, "loss": 0.2737, "step": 4664 }, { "epoch": 0.00827320366779878, "grad_norm": 0.84765625, "learning_rate": 0.001993399299842119, "loss": 0.2495, "step": 4666 }, { "epoch": 0.008276749833108596, "grad_norm": 0.9453125, "learning_rate": 0.0019933921026689615, "loss": 0.4277, "step": 4668 }, { "epoch": 0.00828029599841841, "grad_norm": 0.302734375, "learning_rate": 0.001993384901588626, "loss": 0.2412, "step": 4670 }, { "epoch": 0.008283842163728225, "grad_norm": 0.37890625, "learning_rate": 0.001993377696601144, "loss": 0.2906, "step": 4672 }, { "epoch": 0.008287388329038041, "grad_norm": 0.82421875, "learning_rate": 0.001993370487706547, "loss": 0.2661, "step": 4674 }, { "epoch": 0.008290934494347856, "grad_norm": 0.5625, "learning_rate": 0.001993363274904866, "loss": 0.2185, "step": 4676 }, { "epoch": 0.00829448065965767, "grad_norm": 0.53515625, "learning_rate": 0.001993356058196133, "loss": 0.3298, "step": 4678 }, { "epoch": 0.008298026824967487, "grad_norm": 0.578125, "learning_rate": 0.00199334883758038, "loss": 0.2402, "step": 4680 }, { "epoch": 0.008301572990277301, "grad_norm": 0.953125, "learning_rate": 0.0019933416130576372, "loss": 0.3477, "step": 4682 }, { "epoch": 0.008305119155587116, "grad_norm": 0.8125, "learning_rate": 0.001993334384627938, "loss": 0.3677, "step": 4684 }, { "epoch": 0.008308665320896932, "grad_norm": 0.384765625, "learning_rate": 0.0019933271522913124, "loss": 0.2586, "step": 4686 }, { "epoch": 0.008312211486206747, "grad_norm": 1.8125, "learning_rate": 0.0019933199160477935, "loss": 0.4084, "step": 4688 }, { "epoch": 0.008315757651516561, "grad_norm": 0.4609375, "learning_rate": 0.001993312675897412, "loss": 0.336, "step": 4690 }, { "epoch": 0.008319303816826377, "grad_norm": 2.234375, "learning_rate": 0.0019933054318401994, "loss": 0.3933, "step": 4692 }, { "epoch": 0.008322849982136192, "grad_norm": 0.326171875, "learning_rate": 0.001993298183876188, "loss": 0.2626, "step": 4694 }, { "epoch": 0.008326396147446007, "grad_norm": 1.0078125, "learning_rate": 0.001993290932005409, "loss": 0.3769, "step": 4696 }, { "epoch": 0.008329942312755823, "grad_norm": 0.9140625, "learning_rate": 0.0019932836762278946, "loss": 0.5396, "step": 4698 }, { "epoch": 0.008333488478065637, "grad_norm": 0.490234375, "learning_rate": 0.001993276416543676, "loss": 0.3614, "step": 4700 }, { "epoch": 0.008337034643375454, "grad_norm": 0.58984375, "learning_rate": 0.001993269152952785, "loss": 0.3298, "step": 4702 }, { "epoch": 0.008340580808685268, "grad_norm": 0.5859375, "learning_rate": 0.0019932618854552543, "loss": 0.2188, "step": 4704 }, { "epoch": 0.008344126973995083, "grad_norm": 0.53515625, "learning_rate": 0.0019932546140511145, "loss": 0.3649, "step": 4706 }, { "epoch": 0.008347673139304899, "grad_norm": 0.466796875, "learning_rate": 0.0019932473387403982, "loss": 0.3457, "step": 4708 }, { "epoch": 0.008351219304614714, "grad_norm": 0.318359375, "learning_rate": 0.0019932400595231367, "loss": 0.2567, "step": 4710 }, { "epoch": 0.008354765469924528, "grad_norm": 0.2373046875, "learning_rate": 0.001993232776399362, "loss": 0.2158, "step": 4712 }, { "epoch": 0.008358311635234345, "grad_norm": 0.80859375, "learning_rate": 0.001993225489369106, "loss": 0.2842, "step": 4714 }, { "epoch": 0.008361857800544159, "grad_norm": 0.3984375, "learning_rate": 0.001993218198432401, "loss": 0.2714, "step": 4716 }, { "epoch": 0.008365403965853974, "grad_norm": 0.427734375, "learning_rate": 0.0019932109035892777, "loss": 0.3672, "step": 4718 }, { "epoch": 0.00836895013116379, "grad_norm": 0.353515625, "learning_rate": 0.0019932036048397692, "loss": 0.2279, "step": 4720 }, { "epoch": 0.008372496296473604, "grad_norm": 0.28515625, "learning_rate": 0.001993196302183907, "loss": 0.3203, "step": 4722 }, { "epoch": 0.008376042461783419, "grad_norm": 0.81640625, "learning_rate": 0.001993188995621723, "loss": 0.3099, "step": 4724 }, { "epoch": 0.008379588627093235, "grad_norm": 0.5546875, "learning_rate": 0.001993181685153249, "loss": 0.3665, "step": 4726 }, { "epoch": 0.00838313479240305, "grad_norm": 0.8203125, "learning_rate": 0.001993174370778517, "loss": 0.2998, "step": 4728 }, { "epoch": 0.008386680957712864, "grad_norm": 1.4453125, "learning_rate": 0.0019931670524975594, "loss": 0.2354, "step": 4730 }, { "epoch": 0.00839022712302268, "grad_norm": 1.3046875, "learning_rate": 0.0019931597303104076, "loss": 0.4875, "step": 4732 }, { "epoch": 0.008393773288332495, "grad_norm": 0.412109375, "learning_rate": 0.0019931524042170945, "loss": 0.3313, "step": 4734 }, { "epoch": 0.008397319453642312, "grad_norm": 1.0078125, "learning_rate": 0.001993145074217651, "loss": 0.4375, "step": 4736 }, { "epoch": 0.008400865618952126, "grad_norm": 0.328125, "learning_rate": 0.00199313774031211, "loss": 0.2734, "step": 4738 }, { "epoch": 0.00840441178426194, "grad_norm": 0.640625, "learning_rate": 0.001993130402500503, "loss": 0.3512, "step": 4740 }, { "epoch": 0.008407957949571757, "grad_norm": 0.234375, "learning_rate": 0.001993123060782863, "loss": 0.3138, "step": 4742 }, { "epoch": 0.008411504114881571, "grad_norm": 0.294921875, "learning_rate": 0.0019931157151592215, "loss": 0.2789, "step": 4744 }, { "epoch": 0.008415050280191386, "grad_norm": 0.50390625, "learning_rate": 0.0019931083656296103, "loss": 0.2437, "step": 4746 }, { "epoch": 0.008418596445501202, "grad_norm": 2.90625, "learning_rate": 0.001993101012194062, "loss": 0.4977, "step": 4748 }, { "epoch": 0.008422142610811017, "grad_norm": 0.490234375, "learning_rate": 0.0019930936548526084, "loss": 0.315, "step": 4750 }, { "epoch": 0.008425688776120831, "grad_norm": 0.60546875, "learning_rate": 0.0019930862936052827, "loss": 0.2915, "step": 4752 }, { "epoch": 0.008429234941430648, "grad_norm": 0.984375, "learning_rate": 0.0019930789284521152, "loss": 0.2825, "step": 4754 }, { "epoch": 0.008432781106740462, "grad_norm": 0.326171875, "learning_rate": 0.0019930715593931402, "loss": 0.2295, "step": 4756 }, { "epoch": 0.008436327272050277, "grad_norm": 0.478515625, "learning_rate": 0.0019930641864283885, "loss": 0.3066, "step": 4758 }, { "epoch": 0.008439873437360093, "grad_norm": 4.25, "learning_rate": 0.001993056809557893, "loss": 0.6235, "step": 4760 }, { "epoch": 0.008443419602669908, "grad_norm": 0.2255859375, "learning_rate": 0.0019930494287816853, "loss": 0.1938, "step": 4762 }, { "epoch": 0.008446965767979722, "grad_norm": 0.390625, "learning_rate": 0.001993042044099799, "loss": 0.2948, "step": 4764 }, { "epoch": 0.008450511933289538, "grad_norm": 0.59375, "learning_rate": 0.0019930346555122646, "loss": 0.2259, "step": 4766 }, { "epoch": 0.008454058098599353, "grad_norm": 0.44921875, "learning_rate": 0.0019930272630191157, "loss": 0.2892, "step": 4768 }, { "epoch": 0.00845760426390917, "grad_norm": 0.84375, "learning_rate": 0.0019930198666203843, "loss": 0.3746, "step": 4770 }, { "epoch": 0.008461150429218984, "grad_norm": 0.498046875, "learning_rate": 0.0019930124663161027, "loss": 0.2798, "step": 4772 }, { "epoch": 0.008464696594528798, "grad_norm": 3.15625, "learning_rate": 0.0019930050621063036, "loss": 0.358, "step": 4774 }, { "epoch": 0.008468242759838615, "grad_norm": 0.333984375, "learning_rate": 0.0019929976539910187, "loss": 0.235, "step": 4776 }, { "epoch": 0.00847178892514843, "grad_norm": 0.26953125, "learning_rate": 0.0019929902419702807, "loss": 0.223, "step": 4778 }, { "epoch": 0.008475335090458244, "grad_norm": 0.53515625, "learning_rate": 0.0019929828260441223, "loss": 0.3083, "step": 4780 }, { "epoch": 0.00847888125576806, "grad_norm": 0.38671875, "learning_rate": 0.001992975406212576, "loss": 0.1993, "step": 4782 }, { "epoch": 0.008482427421077875, "grad_norm": 0.462890625, "learning_rate": 0.0019929679824756733, "loss": 0.2866, "step": 4784 }, { "epoch": 0.00848597358638769, "grad_norm": 2.875, "learning_rate": 0.001992960554833448, "loss": 0.3772, "step": 4786 }, { "epoch": 0.008489519751697506, "grad_norm": 0.890625, "learning_rate": 0.001992953123285932, "loss": 0.391, "step": 4788 }, { "epoch": 0.00849306591700732, "grad_norm": 0.2412109375, "learning_rate": 0.001992945687833157, "loss": 0.4167, "step": 4790 }, { "epoch": 0.008496612082317135, "grad_norm": 0.484375, "learning_rate": 0.001992938248475157, "loss": 0.4974, "step": 4792 }, { "epoch": 0.008500158247626951, "grad_norm": 1.2578125, "learning_rate": 0.001992930805211963, "loss": 0.3321, "step": 4794 }, { "epoch": 0.008503704412936765, "grad_norm": 0.3984375, "learning_rate": 0.0019929233580436093, "loss": 0.2286, "step": 4796 }, { "epoch": 0.00850725057824658, "grad_norm": 0.515625, "learning_rate": 0.0019929159069701267, "loss": 0.3003, "step": 4798 }, { "epoch": 0.008510796743556396, "grad_norm": 0.388671875, "learning_rate": 0.001992908451991549, "loss": 0.2634, "step": 4800 }, { "epoch": 0.00851434290886621, "grad_norm": 1.0390625, "learning_rate": 0.001992900993107908, "loss": 0.3445, "step": 4802 }, { "epoch": 0.008517889074176027, "grad_norm": 1.1640625, "learning_rate": 0.0019928935303192372, "loss": 0.2538, "step": 4804 }, { "epoch": 0.008521435239485842, "grad_norm": 1.078125, "learning_rate": 0.0019928860636255685, "loss": 0.2472, "step": 4806 }, { "epoch": 0.008524981404795656, "grad_norm": 0.486328125, "learning_rate": 0.001992878593026935, "loss": 0.2252, "step": 4808 }, { "epoch": 0.008528527570105473, "grad_norm": 0.46484375, "learning_rate": 0.001992871118523369, "loss": 0.2489, "step": 4810 }, { "epoch": 0.008532073735415287, "grad_norm": 0.40625, "learning_rate": 0.001992863640114903, "loss": 0.3192, "step": 4812 }, { "epoch": 0.008535619900725102, "grad_norm": 0.357421875, "learning_rate": 0.0019928561578015707, "loss": 0.2293, "step": 4814 }, { "epoch": 0.008539166066034918, "grad_norm": 0.32421875, "learning_rate": 0.001992848671583404, "loss": 0.2109, "step": 4816 }, { "epoch": 0.008542712231344732, "grad_norm": 0.52734375, "learning_rate": 0.0019928411814604356, "loss": 0.2383, "step": 4818 }, { "epoch": 0.008546258396654547, "grad_norm": 0.416015625, "learning_rate": 0.0019928336874326987, "loss": 0.2808, "step": 4820 }, { "epoch": 0.008549804561964363, "grad_norm": 2.15625, "learning_rate": 0.0019928261895002263, "loss": 0.3658, "step": 4822 }, { "epoch": 0.008553350727274178, "grad_norm": 0.796875, "learning_rate": 0.00199281868766305, "loss": 0.3208, "step": 4824 }, { "epoch": 0.008556896892583992, "grad_norm": 0.7734375, "learning_rate": 0.001992811181921204, "loss": 0.3594, "step": 4826 }, { "epoch": 0.008560443057893809, "grad_norm": 0.5078125, "learning_rate": 0.00199280367227472, "loss": 0.2765, "step": 4828 }, { "epoch": 0.008563989223203623, "grad_norm": 0.62109375, "learning_rate": 0.0019927961587236313, "loss": 0.2422, "step": 4830 }, { "epoch": 0.008567535388513438, "grad_norm": 1.7734375, "learning_rate": 0.001992788641267971, "loss": 0.4287, "step": 4832 }, { "epoch": 0.008571081553823254, "grad_norm": 1.0234375, "learning_rate": 0.001992781119907772, "loss": 0.3503, "step": 4834 }, { "epoch": 0.008574627719133069, "grad_norm": 0.38671875, "learning_rate": 0.0019927735946430668, "loss": 0.22, "step": 4836 }, { "epoch": 0.008578173884442885, "grad_norm": 0.421875, "learning_rate": 0.0019927660654738884, "loss": 0.2637, "step": 4838 }, { "epoch": 0.0085817200497527, "grad_norm": 0.40234375, "learning_rate": 0.00199275853240027, "loss": 0.3468, "step": 4840 }, { "epoch": 0.008585266215062514, "grad_norm": 0.53125, "learning_rate": 0.001992750995422244, "loss": 0.3158, "step": 4842 }, { "epoch": 0.00858881238037233, "grad_norm": 2.578125, "learning_rate": 0.001992743454539844, "loss": 0.2404, "step": 4844 }, { "epoch": 0.008592358545682145, "grad_norm": 0.470703125, "learning_rate": 0.001992735909753103, "loss": 0.3253, "step": 4846 }, { "epoch": 0.00859590471099196, "grad_norm": 1.2578125, "learning_rate": 0.001992728361062053, "loss": 0.2342, "step": 4848 }, { "epoch": 0.008599450876301776, "grad_norm": 0.640625, "learning_rate": 0.001992720808466728, "loss": 0.2632, "step": 4850 }, { "epoch": 0.00860299704161159, "grad_norm": 0.515625, "learning_rate": 0.001992713251967161, "loss": 0.2621, "step": 4852 }, { "epoch": 0.008606543206921405, "grad_norm": 2.96875, "learning_rate": 0.0019927056915633842, "loss": 0.3666, "step": 4854 }, { "epoch": 0.008610089372231221, "grad_norm": 1.5703125, "learning_rate": 0.0019926981272554317, "loss": 0.4931, "step": 4856 }, { "epoch": 0.008613635537541036, "grad_norm": 0.33984375, "learning_rate": 0.001992690559043336, "loss": 0.2825, "step": 4858 }, { "epoch": 0.00861718170285085, "grad_norm": 0.29296875, "learning_rate": 0.0019926829869271307, "loss": 0.2756, "step": 4860 }, { "epoch": 0.008620727868160667, "grad_norm": 2.09375, "learning_rate": 0.0019926754109068482, "loss": 0.273, "step": 4862 }, { "epoch": 0.008624274033470481, "grad_norm": 0.345703125, "learning_rate": 0.001992667830982522, "loss": 0.2479, "step": 4864 }, { "epoch": 0.008627820198780296, "grad_norm": 1.0703125, "learning_rate": 0.001992660247154185, "loss": 0.2527, "step": 4866 }, { "epoch": 0.008631366364090112, "grad_norm": 0.373046875, "learning_rate": 0.001992652659421871, "loss": 0.2085, "step": 4868 }, { "epoch": 0.008634912529399926, "grad_norm": 1.0703125, "learning_rate": 0.0019926450677856125, "loss": 0.4311, "step": 4870 }, { "epoch": 0.008638458694709743, "grad_norm": 0.609375, "learning_rate": 0.001992637472245443, "loss": 0.4236, "step": 4872 }, { "epoch": 0.008642004860019557, "grad_norm": 0.384765625, "learning_rate": 0.001992629872801396, "loss": 0.2683, "step": 4874 }, { "epoch": 0.008645551025329372, "grad_norm": 4.21875, "learning_rate": 0.001992622269453504, "loss": 0.3981, "step": 4876 }, { "epoch": 0.008649097190639188, "grad_norm": 1.0859375, "learning_rate": 0.001992614662201801, "loss": 0.2772, "step": 4878 }, { "epoch": 0.008652643355949003, "grad_norm": 0.2138671875, "learning_rate": 0.00199260705104632, "loss": 0.2528, "step": 4880 }, { "epoch": 0.008656189521258817, "grad_norm": 0.4296875, "learning_rate": 0.001992599435987094, "loss": 0.2588, "step": 4882 }, { "epoch": 0.008659735686568634, "grad_norm": 0.69140625, "learning_rate": 0.0019925918170241573, "loss": 0.2687, "step": 4884 }, { "epoch": 0.008663281851878448, "grad_norm": 0.92578125, "learning_rate": 0.0019925841941575415, "loss": 0.297, "step": 4886 }, { "epoch": 0.008666828017188263, "grad_norm": 0.349609375, "learning_rate": 0.001992576567387281, "loss": 0.2267, "step": 4888 }, { "epoch": 0.008670374182498079, "grad_norm": 0.287109375, "learning_rate": 0.0019925689367134096, "loss": 0.2558, "step": 4890 }, { "epoch": 0.008673920347807893, "grad_norm": 0.330078125, "learning_rate": 0.00199256130213596, "loss": 0.3102, "step": 4892 }, { "epoch": 0.008677466513117708, "grad_norm": 2.734375, "learning_rate": 0.0019925536636549654, "loss": 0.3242, "step": 4894 }, { "epoch": 0.008681012678427524, "grad_norm": 2.796875, "learning_rate": 0.0019925460212704598, "loss": 0.4111, "step": 4896 }, { "epoch": 0.008684558843737339, "grad_norm": 0.53125, "learning_rate": 0.0019925383749824764, "loss": 0.3217, "step": 4898 }, { "epoch": 0.008688105009047153, "grad_norm": 0.9609375, "learning_rate": 0.001992530724791048, "loss": 0.2641, "step": 4900 }, { "epoch": 0.00869165117435697, "grad_norm": 0.625, "learning_rate": 0.0019925230706962093, "loss": 0.3279, "step": 4902 }, { "epoch": 0.008695197339666784, "grad_norm": 0.21875, "learning_rate": 0.0019925154126979932, "loss": 0.2284, "step": 4904 }, { "epoch": 0.0086987435049766, "grad_norm": 2.6875, "learning_rate": 0.0019925077507964325, "loss": 0.5094, "step": 4906 }, { "epoch": 0.008702289670286415, "grad_norm": 0.49609375, "learning_rate": 0.001992500084991562, "loss": 0.2678, "step": 4908 }, { "epoch": 0.00870583583559623, "grad_norm": 0.318359375, "learning_rate": 0.001992492415283414, "loss": 0.2485, "step": 4910 }, { "epoch": 0.008709382000906046, "grad_norm": 1.1484375, "learning_rate": 0.001992484741672023, "loss": 0.2542, "step": 4912 }, { "epoch": 0.00871292816621586, "grad_norm": 0.29296875, "learning_rate": 0.0019924770641574223, "loss": 0.2881, "step": 4914 }, { "epoch": 0.008716474331525675, "grad_norm": 0.45703125, "learning_rate": 0.001992469382739645, "loss": 0.305, "step": 4916 }, { "epoch": 0.008720020496835491, "grad_norm": 0.30078125, "learning_rate": 0.0019924616974187253, "loss": 0.2519, "step": 4918 }, { "epoch": 0.008723566662145306, "grad_norm": 0.4609375, "learning_rate": 0.0019924540081946956, "loss": 0.3043, "step": 4920 }, { "epoch": 0.00872711282745512, "grad_norm": 0.859375, "learning_rate": 0.0019924463150675915, "loss": 0.2178, "step": 4922 }, { "epoch": 0.008730658992764937, "grad_norm": 0.392578125, "learning_rate": 0.001992438618037445, "loss": 0.2876, "step": 4924 }, { "epoch": 0.008734205158074751, "grad_norm": 0.91015625, "learning_rate": 0.001992430917104291, "loss": 0.2921, "step": 4926 }, { "epoch": 0.008737751323384566, "grad_norm": 8.375, "learning_rate": 0.0019924232122681624, "loss": 0.3517, "step": 4928 }, { "epoch": 0.008741297488694382, "grad_norm": 0.59375, "learning_rate": 0.0019924155035290925, "loss": 0.2663, "step": 4930 }, { "epoch": 0.008744843654004197, "grad_norm": 2.828125, "learning_rate": 0.001992407790887116, "loss": 0.2653, "step": 4932 }, { "epoch": 0.008748389819314011, "grad_norm": 0.59765625, "learning_rate": 0.001992400074342266, "loss": 0.3318, "step": 4934 }, { "epoch": 0.008751935984623828, "grad_norm": 0.4609375, "learning_rate": 0.0019923923538945764, "loss": 0.242, "step": 4936 }, { "epoch": 0.008755482149933642, "grad_norm": 0.33203125, "learning_rate": 0.001992384629544081, "loss": 0.2559, "step": 4938 }, { "epoch": 0.008759028315243458, "grad_norm": 0.671875, "learning_rate": 0.0019923769012908142, "loss": 0.2524, "step": 4940 }, { "epoch": 0.008762574480553273, "grad_norm": 0.61328125, "learning_rate": 0.001992369169134809, "loss": 0.297, "step": 4942 }, { "epoch": 0.008766120645863087, "grad_norm": 0.50390625, "learning_rate": 0.0019923614330760986, "loss": 0.3369, "step": 4944 }, { "epoch": 0.008769666811172904, "grad_norm": 1.0078125, "learning_rate": 0.001992353693114719, "loss": 0.2508, "step": 4946 }, { "epoch": 0.008773212976482718, "grad_norm": 1.96875, "learning_rate": 0.0019923459492507014, "loss": 0.4605, "step": 4948 }, { "epoch": 0.008776759141792533, "grad_norm": 0.72265625, "learning_rate": 0.0019923382014840818, "loss": 0.2632, "step": 4950 }, { "epoch": 0.00878030530710235, "grad_norm": 0.306640625, "learning_rate": 0.001992330449814893, "loss": 0.2353, "step": 4952 }, { "epoch": 0.008783851472412164, "grad_norm": 0.302734375, "learning_rate": 0.0019923226942431685, "loss": 0.197, "step": 4954 }, { "epoch": 0.008787397637721978, "grad_norm": 0.3203125, "learning_rate": 0.0019923149347689435, "loss": 0.2633, "step": 4956 }, { "epoch": 0.008790943803031795, "grad_norm": 0.478515625, "learning_rate": 0.001992307171392251, "loss": 0.237, "step": 4958 }, { "epoch": 0.008794489968341609, "grad_norm": 0.9765625, "learning_rate": 0.0019922994041131257, "loss": 0.3999, "step": 4960 }, { "epoch": 0.008798036133651424, "grad_norm": 0.431640625, "learning_rate": 0.0019922916329316006, "loss": 0.2326, "step": 4962 }, { "epoch": 0.00880158229896124, "grad_norm": 0.79296875, "learning_rate": 0.0019922838578477105, "loss": 0.3141, "step": 4964 }, { "epoch": 0.008805128464271055, "grad_norm": 0.5234375, "learning_rate": 0.0019922760788614892, "loss": 0.2723, "step": 4966 }, { "epoch": 0.008808674629580869, "grad_norm": 1.2265625, "learning_rate": 0.0019922682959729703, "loss": 0.2961, "step": 4968 }, { "epoch": 0.008812220794890685, "grad_norm": 0.60546875, "learning_rate": 0.001992260509182188, "loss": 0.3318, "step": 4970 }, { "epoch": 0.0088157669602005, "grad_norm": 0.6328125, "learning_rate": 0.0019922527184891774, "loss": 0.253, "step": 4972 }, { "epoch": 0.008819313125510316, "grad_norm": 0.265625, "learning_rate": 0.0019922449238939707, "loss": 0.2028, "step": 4974 }, { "epoch": 0.00882285929082013, "grad_norm": 2.640625, "learning_rate": 0.0019922371253966037, "loss": 0.3068, "step": 4976 }, { "epoch": 0.008826405456129945, "grad_norm": 0.37890625, "learning_rate": 0.0019922293229971094, "loss": 0.3086, "step": 4978 }, { "epoch": 0.008829951621439762, "grad_norm": 3.3125, "learning_rate": 0.0019922215166955225, "loss": 0.4288, "step": 4980 }, { "epoch": 0.008833497786749576, "grad_norm": 0.28515625, "learning_rate": 0.0019922137064918768, "loss": 0.2756, "step": 4982 }, { "epoch": 0.00883704395205939, "grad_norm": 0.373046875, "learning_rate": 0.001992205892386207, "loss": 0.222, "step": 4984 }, { "epoch": 0.008840590117369207, "grad_norm": 0.39453125, "learning_rate": 0.001992198074378546, "loss": 0.1995, "step": 4986 }, { "epoch": 0.008844136282679022, "grad_norm": 1.453125, "learning_rate": 0.00199219025246893, "loss": 0.4126, "step": 4988 }, { "epoch": 0.008847682447988836, "grad_norm": 0.419921875, "learning_rate": 0.001992182426657391, "loss": 0.2793, "step": 4990 }, { "epoch": 0.008851228613298652, "grad_norm": 0.66796875, "learning_rate": 0.001992174596943965, "loss": 0.214, "step": 4992 }, { "epoch": 0.008854774778608467, "grad_norm": 0.498046875, "learning_rate": 0.0019921667633286855, "loss": 0.2751, "step": 4994 }, { "epoch": 0.008858320943918281, "grad_norm": 0.28515625, "learning_rate": 0.0019921589258115866, "loss": 0.2328, "step": 4996 }, { "epoch": 0.008861867109228098, "grad_norm": 0.2470703125, "learning_rate": 0.001992151084392703, "loss": 0.2115, "step": 4998 }, { "epoch": 0.008865413274537912, "grad_norm": 1.78125, "learning_rate": 0.0019921432390720686, "loss": 0.355, "step": 5000 }, { "epoch": 0.008868959439847727, "grad_norm": 0.2265625, "learning_rate": 0.001992135389849718, "loss": 0.2089, "step": 5002 }, { "epoch": 0.008872505605157543, "grad_norm": 0.55859375, "learning_rate": 0.001992127536725685, "loss": 0.2244, "step": 5004 }, { "epoch": 0.008876051770467358, "grad_norm": 0.40625, "learning_rate": 0.0019921196797000047, "loss": 0.3244, "step": 5006 }, { "epoch": 0.008879597935777174, "grad_norm": 0.46484375, "learning_rate": 0.0019921118187727115, "loss": 0.2634, "step": 5008 }, { "epoch": 0.008883144101086989, "grad_norm": 0.2158203125, "learning_rate": 0.001992103953943839, "loss": 0.2841, "step": 5010 }, { "epoch": 0.008886690266396803, "grad_norm": 0.328125, "learning_rate": 0.001992096085213422, "loss": 0.2345, "step": 5012 }, { "epoch": 0.00889023643170662, "grad_norm": 0.310546875, "learning_rate": 0.001992088212581495, "loss": 0.3092, "step": 5014 }, { "epoch": 0.008893782597016434, "grad_norm": 0.36328125, "learning_rate": 0.0019920803360480924, "loss": 0.2867, "step": 5016 }, { "epoch": 0.008897328762326248, "grad_norm": 0.423828125, "learning_rate": 0.001992072455613248, "loss": 0.2624, "step": 5018 }, { "epoch": 0.008900874927636065, "grad_norm": 0.392578125, "learning_rate": 0.001992064571276998, "loss": 0.278, "step": 5020 }, { "epoch": 0.00890442109294588, "grad_norm": 5.8125, "learning_rate": 0.001992056683039375, "loss": 0.5674, "step": 5022 }, { "epoch": 0.008907967258255694, "grad_norm": 0.69921875, "learning_rate": 0.0019920487909004143, "loss": 0.3603, "step": 5024 }, { "epoch": 0.00891151342356551, "grad_norm": 0.61328125, "learning_rate": 0.0019920408948601504, "loss": 0.2735, "step": 5026 }, { "epoch": 0.008915059588875325, "grad_norm": 0.37890625, "learning_rate": 0.0019920329949186175, "loss": 0.2716, "step": 5028 }, { "epoch": 0.00891860575418514, "grad_norm": 0.953125, "learning_rate": 0.0019920250910758506, "loss": 0.2975, "step": 5030 }, { "epoch": 0.008922151919494956, "grad_norm": 0.48046875, "learning_rate": 0.001992017183331884, "loss": 0.2184, "step": 5032 }, { "epoch": 0.00892569808480477, "grad_norm": 0.546875, "learning_rate": 0.001992009271686753, "loss": 0.2205, "step": 5034 }, { "epoch": 0.008929244250114585, "grad_norm": 0.62109375, "learning_rate": 0.001992001356140491, "loss": 0.2986, "step": 5036 }, { "epoch": 0.008932790415424401, "grad_norm": 0.30078125, "learning_rate": 0.0019919934366931335, "loss": 0.3347, "step": 5038 }, { "epoch": 0.008936336580734216, "grad_norm": 0.30078125, "learning_rate": 0.0019919855133447148, "loss": 0.2387, "step": 5040 }, { "epoch": 0.008939882746044032, "grad_norm": 0.404296875, "learning_rate": 0.0019919775860952693, "loss": 0.3003, "step": 5042 }, { "epoch": 0.008943428911353846, "grad_norm": 0.5234375, "learning_rate": 0.001991969654944832, "loss": 0.2495, "step": 5044 }, { "epoch": 0.008946975076663661, "grad_norm": 0.423828125, "learning_rate": 0.001991961719893438, "loss": 0.2789, "step": 5046 }, { "epoch": 0.008950521241973477, "grad_norm": 0.40234375, "learning_rate": 0.001991953780941121, "loss": 0.2735, "step": 5048 }, { "epoch": 0.008954067407283292, "grad_norm": 0.474609375, "learning_rate": 0.001991945838087917, "loss": 0.3177, "step": 5050 }, { "epoch": 0.008957613572593106, "grad_norm": 0.259765625, "learning_rate": 0.001991937891333859, "loss": 0.2191, "step": 5052 }, { "epoch": 0.008961159737902923, "grad_norm": 0.421875, "learning_rate": 0.001991929940678984, "loss": 0.2816, "step": 5054 }, { "epoch": 0.008964705903212737, "grad_norm": 0.62890625, "learning_rate": 0.0019919219861233243, "loss": 0.4708, "step": 5056 }, { "epoch": 0.008968252068522552, "grad_norm": 0.5546875, "learning_rate": 0.0019919140276669165, "loss": 0.325, "step": 5058 }, { "epoch": 0.008971798233832368, "grad_norm": 0.6953125, "learning_rate": 0.001991906065309795, "loss": 0.2673, "step": 5060 }, { "epoch": 0.008975344399142183, "grad_norm": 0.404296875, "learning_rate": 0.0019918980990519942, "loss": 0.2394, "step": 5062 }, { "epoch": 0.008978890564451997, "grad_norm": 1.2734375, "learning_rate": 0.001991890128893549, "loss": 0.3693, "step": 5064 }, { "epoch": 0.008982436729761813, "grad_norm": 0.44140625, "learning_rate": 0.0019918821548344946, "loss": 0.265, "step": 5066 }, { "epoch": 0.008985982895071628, "grad_norm": 1.140625, "learning_rate": 0.0019918741768748657, "loss": 0.2911, "step": 5068 }, { "epoch": 0.008989529060381442, "grad_norm": 0.703125, "learning_rate": 0.0019918661950146977, "loss": 0.2495, "step": 5070 }, { "epoch": 0.008993075225691259, "grad_norm": 0.57421875, "learning_rate": 0.0019918582092540247, "loss": 0.2729, "step": 5072 }, { "epoch": 0.008996621391001073, "grad_norm": 0.443359375, "learning_rate": 0.0019918502195928815, "loss": 0.2481, "step": 5074 }, { "epoch": 0.00900016755631089, "grad_norm": 0.400390625, "learning_rate": 0.001991842226031304, "loss": 0.2532, "step": 5076 }, { "epoch": 0.009003713721620704, "grad_norm": 0.515625, "learning_rate": 0.0019918342285693267, "loss": 0.4735, "step": 5078 }, { "epoch": 0.009007259886930519, "grad_norm": 0.33984375, "learning_rate": 0.001991826227206984, "loss": 0.1499, "step": 5080 }, { "epoch": 0.009010806052240335, "grad_norm": 1.453125, "learning_rate": 0.001991818221944312, "loss": 0.4161, "step": 5082 }, { "epoch": 0.00901435221755015, "grad_norm": 0.486328125, "learning_rate": 0.0019918102127813447, "loss": 0.3267, "step": 5084 }, { "epoch": 0.009017898382859964, "grad_norm": 0.41015625, "learning_rate": 0.001991802199718118, "loss": 0.3666, "step": 5086 }, { "epoch": 0.00902144454816978, "grad_norm": 0.4296875, "learning_rate": 0.0019917941827546663, "loss": 0.2611, "step": 5088 }, { "epoch": 0.009024990713479595, "grad_norm": 0.44921875, "learning_rate": 0.0019917861618910246, "loss": 0.2883, "step": 5090 }, { "epoch": 0.00902853687878941, "grad_norm": 1.421875, "learning_rate": 0.0019917781371272284, "loss": 0.4393, "step": 5092 }, { "epoch": 0.009032083044099226, "grad_norm": 0.216796875, "learning_rate": 0.001991770108463313, "loss": 0.2813, "step": 5094 }, { "epoch": 0.00903562920940904, "grad_norm": 0.4921875, "learning_rate": 0.0019917620758993127, "loss": 0.2903, "step": 5096 }, { "epoch": 0.009039175374718855, "grad_norm": 0.734375, "learning_rate": 0.0019917540394352633, "loss": 0.34, "step": 5098 }, { "epoch": 0.009042721540028671, "grad_norm": 0.5625, "learning_rate": 0.0019917459990711995, "loss": 0.2248, "step": 5100 }, { "epoch": 0.009046267705338486, "grad_norm": 0.279296875, "learning_rate": 0.001991737954807157, "loss": 0.3489, "step": 5102 }, { "epoch": 0.0090498138706483, "grad_norm": 1.03125, "learning_rate": 0.0019917299066431705, "loss": 0.1884, "step": 5104 }, { "epoch": 0.009053360035958117, "grad_norm": 0.609375, "learning_rate": 0.001991721854579275, "loss": 0.234, "step": 5106 }, { "epoch": 0.009056906201267931, "grad_norm": 0.46484375, "learning_rate": 0.0019917137986155066, "loss": 0.3182, "step": 5108 }, { "epoch": 0.009060452366577747, "grad_norm": 1.515625, "learning_rate": 0.0019917057387519, "loss": 0.4011, "step": 5110 }, { "epoch": 0.009063998531887562, "grad_norm": 0.294921875, "learning_rate": 0.0019916976749884898, "loss": 0.3163, "step": 5112 }, { "epoch": 0.009067544697197377, "grad_norm": 0.478515625, "learning_rate": 0.001991689607325313, "loss": 0.3905, "step": 5114 }, { "epoch": 0.009071090862507193, "grad_norm": 0.765625, "learning_rate": 0.001991681535762403, "loss": 0.2781, "step": 5116 }, { "epoch": 0.009074637027817007, "grad_norm": 0.37109375, "learning_rate": 0.0019916734602997963, "loss": 0.3354, "step": 5118 }, { "epoch": 0.009078183193126822, "grad_norm": 0.357421875, "learning_rate": 0.0019916653809375273, "loss": 0.2352, "step": 5120 }, { "epoch": 0.009081729358436638, "grad_norm": 0.63671875, "learning_rate": 0.0019916572976756324, "loss": 0.2426, "step": 5122 }, { "epoch": 0.009085275523746453, "grad_norm": 1.484375, "learning_rate": 0.0019916492105141463, "loss": 0.4047, "step": 5124 }, { "epoch": 0.009088821689056267, "grad_norm": 0.53515625, "learning_rate": 0.0019916411194531043, "loss": 0.2283, "step": 5126 }, { "epoch": 0.009092367854366084, "grad_norm": 0.953125, "learning_rate": 0.001991633024492542, "loss": 0.3103, "step": 5128 }, { "epoch": 0.009095914019675898, "grad_norm": 0.796875, "learning_rate": 0.001991624925632495, "loss": 0.2451, "step": 5130 }, { "epoch": 0.009099460184985713, "grad_norm": 0.416015625, "learning_rate": 0.0019916168228729983, "loss": 0.3287, "step": 5132 }, { "epoch": 0.009103006350295529, "grad_norm": 2.125, "learning_rate": 0.001991608716214088, "loss": 0.3232, "step": 5134 }, { "epoch": 0.009106552515605344, "grad_norm": 0.69140625, "learning_rate": 0.0019916006056557986, "loss": 0.1628, "step": 5136 }, { "epoch": 0.009110098680915158, "grad_norm": 0.5703125, "learning_rate": 0.001991592491198166, "loss": 0.3016, "step": 5138 }, { "epoch": 0.009113644846224974, "grad_norm": 0.470703125, "learning_rate": 0.001991584372841226, "loss": 0.2512, "step": 5140 }, { "epoch": 0.009117191011534789, "grad_norm": 6.65625, "learning_rate": 0.001991576250585014, "loss": 0.3315, "step": 5142 }, { "epoch": 0.009120737176844605, "grad_norm": 0.578125, "learning_rate": 0.0019915681244295647, "loss": 0.2593, "step": 5144 }, { "epoch": 0.00912428334215442, "grad_norm": 1.2421875, "learning_rate": 0.001991559994374915, "loss": 0.4136, "step": 5146 }, { "epoch": 0.009127829507464234, "grad_norm": 0.30859375, "learning_rate": 0.001991551860421099, "loss": 0.2256, "step": 5148 }, { "epoch": 0.00913137567277405, "grad_norm": 0.5546875, "learning_rate": 0.001991543722568154, "loss": 0.2684, "step": 5150 }, { "epoch": 0.009134921838083865, "grad_norm": 0.6640625, "learning_rate": 0.001991535580816114, "loss": 0.2399, "step": 5152 }, { "epoch": 0.00913846800339368, "grad_norm": 1.171875, "learning_rate": 0.001991527435165015, "loss": 0.2677, "step": 5154 }, { "epoch": 0.009142014168703496, "grad_norm": 2.421875, "learning_rate": 0.0019915192856148935, "loss": 0.4014, "step": 5156 }, { "epoch": 0.00914556033401331, "grad_norm": 0.4765625, "learning_rate": 0.0019915111321657845, "loss": 0.2408, "step": 5158 }, { "epoch": 0.009149106499323125, "grad_norm": 0.451171875, "learning_rate": 0.0019915029748177235, "loss": 0.5697, "step": 5160 }, { "epoch": 0.009152652664632941, "grad_norm": 1.03125, "learning_rate": 0.001991494813570746, "loss": 0.3591, "step": 5162 }, { "epoch": 0.009156198829942756, "grad_norm": 1.7265625, "learning_rate": 0.001991486648424888, "loss": 0.3965, "step": 5164 }, { "epoch": 0.00915974499525257, "grad_norm": 0.41796875, "learning_rate": 0.001991478479380186, "loss": 0.227, "step": 5166 }, { "epoch": 0.009163291160562387, "grad_norm": 0.484375, "learning_rate": 0.001991470306436674, "loss": 0.2379, "step": 5168 }, { "epoch": 0.009166837325872201, "grad_norm": 2.15625, "learning_rate": 0.0019914621295943893, "loss": 0.373, "step": 5170 }, { "epoch": 0.009170383491182016, "grad_norm": 0.4296875, "learning_rate": 0.001991453948853367, "loss": 0.2354, "step": 5172 }, { "epoch": 0.009173929656491832, "grad_norm": 0.70703125, "learning_rate": 0.001991445764213643, "loss": 0.4654, "step": 5174 }, { "epoch": 0.009177475821801647, "grad_norm": 0.2734375, "learning_rate": 0.001991437575675253, "loss": 0.2566, "step": 5176 }, { "epoch": 0.009181021987111463, "grad_norm": 0.359375, "learning_rate": 0.0019914293832382327, "loss": 0.2169, "step": 5178 }, { "epoch": 0.009184568152421278, "grad_norm": 0.98046875, "learning_rate": 0.001991421186902618, "loss": 0.2468, "step": 5180 }, { "epoch": 0.009188114317731092, "grad_norm": 0.2734375, "learning_rate": 0.001991412986668445, "loss": 0.3031, "step": 5182 }, { "epoch": 0.009191660483040908, "grad_norm": 0.58203125, "learning_rate": 0.0019914047825357493, "loss": 0.3574, "step": 5184 }, { "epoch": 0.009195206648350723, "grad_norm": 0.69140625, "learning_rate": 0.001991396574504567, "loss": 0.2387, "step": 5186 }, { "epoch": 0.009198752813660538, "grad_norm": 0.484375, "learning_rate": 0.0019913883625749342, "loss": 0.2928, "step": 5188 }, { "epoch": 0.009202298978970354, "grad_norm": 1.578125, "learning_rate": 0.0019913801467468855, "loss": 0.4186, "step": 5190 }, { "epoch": 0.009205845144280168, "grad_norm": 0.244140625, "learning_rate": 0.0019913719270204587, "loss": 0.2622, "step": 5192 }, { "epoch": 0.009209391309589983, "grad_norm": 0.328125, "learning_rate": 0.001991363703395689, "loss": 0.2574, "step": 5194 }, { "epoch": 0.0092129374748998, "grad_norm": 0.5078125, "learning_rate": 0.0019913554758726115, "loss": 0.289, "step": 5196 }, { "epoch": 0.009216483640209614, "grad_norm": 1.203125, "learning_rate": 0.001991347244451263, "loss": 0.2643, "step": 5198 }, { "epoch": 0.009220029805519428, "grad_norm": 0.28125, "learning_rate": 0.0019913390091316797, "loss": 0.2406, "step": 5200 }, { "epoch": 0.009223575970829245, "grad_norm": 0.8984375, "learning_rate": 0.0019913307699138973, "loss": 0.4134, "step": 5202 }, { "epoch": 0.00922712213613906, "grad_norm": 0.34375, "learning_rate": 0.001991322526797952, "loss": 0.2647, "step": 5204 }, { "epoch": 0.009230668301448874, "grad_norm": 0.41796875, "learning_rate": 0.0019913142797838793, "loss": 0.2725, "step": 5206 }, { "epoch": 0.00923421446675869, "grad_norm": 0.6953125, "learning_rate": 0.0019913060288717158, "loss": 0.2381, "step": 5208 }, { "epoch": 0.009237760632068505, "grad_norm": 0.7265625, "learning_rate": 0.0019912977740614976, "loss": 0.2676, "step": 5210 }, { "epoch": 0.00924130679737832, "grad_norm": 0.5078125, "learning_rate": 0.0019912895153532608, "loss": 0.3104, "step": 5212 }, { "epoch": 0.009244852962688135, "grad_norm": 0.37109375, "learning_rate": 0.001991281252747041, "loss": 0.3894, "step": 5214 }, { "epoch": 0.00924839912799795, "grad_norm": 1.0546875, "learning_rate": 0.001991272986242875, "loss": 0.297, "step": 5216 }, { "epoch": 0.009251945293307766, "grad_norm": 1.6484375, "learning_rate": 0.0019912647158407985, "loss": 0.3694, "step": 5218 }, { "epoch": 0.00925549145861758, "grad_norm": 0.318359375, "learning_rate": 0.001991256441540848, "loss": 0.3923, "step": 5220 }, { "epoch": 0.009259037623927395, "grad_norm": 0.2099609375, "learning_rate": 0.0019912481633430593, "loss": 0.2637, "step": 5222 }, { "epoch": 0.009262583789237212, "grad_norm": 2.359375, "learning_rate": 0.001991239881247469, "loss": 0.3471, "step": 5224 }, { "epoch": 0.009266129954547026, "grad_norm": 0.91015625, "learning_rate": 0.0019912315952541134, "loss": 0.3024, "step": 5226 }, { "epoch": 0.00926967611985684, "grad_norm": 0.373046875, "learning_rate": 0.001991223305363028, "loss": 0.2688, "step": 5228 }, { "epoch": 0.009273222285166657, "grad_norm": 0.2265625, "learning_rate": 0.00199121501157425, "loss": 0.298, "step": 5230 }, { "epoch": 0.009276768450476472, "grad_norm": 0.53125, "learning_rate": 0.001991206713887815, "loss": 0.3344, "step": 5232 }, { "epoch": 0.009280314615786286, "grad_norm": 1.1640625, "learning_rate": 0.0019911984123037593, "loss": 0.3017, "step": 5234 }, { "epoch": 0.009283860781096102, "grad_norm": 0.376953125, "learning_rate": 0.00199119010682212, "loss": 0.3407, "step": 5236 }, { "epoch": 0.009287406946405917, "grad_norm": 0.2890625, "learning_rate": 0.0019911817974429323, "loss": 0.2423, "step": 5238 }, { "epoch": 0.009290953111715732, "grad_norm": 0.33984375, "learning_rate": 0.001991173484166233, "loss": 0.2364, "step": 5240 }, { "epoch": 0.009294499277025548, "grad_norm": 1.390625, "learning_rate": 0.001991165166992059, "loss": 0.3402, "step": 5242 }, { "epoch": 0.009298045442335362, "grad_norm": 0.267578125, "learning_rate": 0.0019911568459204457, "loss": 0.2299, "step": 5244 }, { "epoch": 0.009301591607645179, "grad_norm": 0.421875, "learning_rate": 0.0019911485209514303, "loss": 0.3193, "step": 5246 }, { "epoch": 0.009305137772954993, "grad_norm": 0.376953125, "learning_rate": 0.001991140192085049, "loss": 0.2265, "step": 5248 }, { "epoch": 0.009308683938264808, "grad_norm": 0.37109375, "learning_rate": 0.0019911318593213378, "loss": 0.2498, "step": 5250 }, { "epoch": 0.009312230103574624, "grad_norm": 0.6171875, "learning_rate": 0.0019911235226603343, "loss": 0.2617, "step": 5252 }, { "epoch": 0.009315776268884439, "grad_norm": 0.58984375, "learning_rate": 0.0019911151821020733, "loss": 0.3511, "step": 5254 }, { "epoch": 0.009319322434194253, "grad_norm": 0.55859375, "learning_rate": 0.001991106837646592, "loss": 0.3287, "step": 5256 }, { "epoch": 0.00932286859950407, "grad_norm": 0.279296875, "learning_rate": 0.0019910984892939276, "loss": 0.3323, "step": 5258 }, { "epoch": 0.009326414764813884, "grad_norm": 0.2294921875, "learning_rate": 0.0019910901370441157, "loss": 0.2309, "step": 5260 }, { "epoch": 0.009329960930123699, "grad_norm": 0.283203125, "learning_rate": 0.0019910817808971933, "loss": 0.3175, "step": 5262 }, { "epoch": 0.009333507095433515, "grad_norm": 0.8203125, "learning_rate": 0.001991073420853197, "loss": 0.2664, "step": 5264 }, { "epoch": 0.00933705326074333, "grad_norm": 0.3515625, "learning_rate": 0.0019910650569121627, "loss": 0.2869, "step": 5266 }, { "epoch": 0.009340599426053144, "grad_norm": 0.220703125, "learning_rate": 0.0019910566890741273, "loss": 0.2393, "step": 5268 }, { "epoch": 0.00934414559136296, "grad_norm": 0.267578125, "learning_rate": 0.001991048317339128, "loss": 0.2711, "step": 5270 }, { "epoch": 0.009347691756672775, "grad_norm": 0.671875, "learning_rate": 0.001991039941707201, "loss": 0.3396, "step": 5272 }, { "epoch": 0.00935123792198259, "grad_norm": 0.55859375, "learning_rate": 0.0019910315621783827, "loss": 0.2058, "step": 5274 }, { "epoch": 0.009354784087292406, "grad_norm": 1.0703125, "learning_rate": 0.00199102317875271, "loss": 0.396, "step": 5276 }, { "epoch": 0.00935833025260222, "grad_norm": 1.046875, "learning_rate": 0.001991014791430219, "loss": 0.2666, "step": 5278 }, { "epoch": 0.009361876417912036, "grad_norm": 0.57421875, "learning_rate": 0.0019910064002109473, "loss": 0.2908, "step": 5280 }, { "epoch": 0.009365422583221851, "grad_norm": 1.5234375, "learning_rate": 0.001990998005094931, "loss": 0.3964, "step": 5282 }, { "epoch": 0.009368968748531666, "grad_norm": 2.296875, "learning_rate": 0.0019909896060822073, "loss": 0.3696, "step": 5284 }, { "epoch": 0.009372514913841482, "grad_norm": 0.3671875, "learning_rate": 0.001990981203172812, "loss": 0.264, "step": 5286 }, { "epoch": 0.009376061079151296, "grad_norm": 0.3515625, "learning_rate": 0.001990972796366783, "loss": 0.2277, "step": 5288 }, { "epoch": 0.009379607244461111, "grad_norm": 0.59765625, "learning_rate": 0.0019909643856641564, "loss": 0.2843, "step": 5290 }, { "epoch": 0.009383153409770927, "grad_norm": 0.314453125, "learning_rate": 0.0019909559710649693, "loss": 0.2127, "step": 5292 }, { "epoch": 0.009386699575080742, "grad_norm": 0.4140625, "learning_rate": 0.0019909475525692576, "loss": 0.2601, "step": 5294 }, { "epoch": 0.009390245740390556, "grad_norm": 0.298828125, "learning_rate": 0.00199093913017706, "loss": 0.2017, "step": 5296 }, { "epoch": 0.009393791905700373, "grad_norm": 0.34375, "learning_rate": 0.0019909307038884112, "loss": 0.2771, "step": 5298 }, { "epoch": 0.009397338071010187, "grad_norm": 0.3671875, "learning_rate": 0.001990922273703349, "loss": 0.1868, "step": 5300 }, { "epoch": 0.009400884236320002, "grad_norm": 0.341796875, "learning_rate": 0.001990913839621911, "loss": 0.3177, "step": 5302 }, { "epoch": 0.009404430401629818, "grad_norm": 0.28125, "learning_rate": 0.0019909054016441327, "loss": 0.2739, "step": 5304 }, { "epoch": 0.009407976566939633, "grad_norm": 0.921875, "learning_rate": 0.001990896959770052, "loss": 0.2774, "step": 5306 }, { "epoch": 0.009411522732249447, "grad_norm": 2.9375, "learning_rate": 0.0019908885139997053, "loss": 0.3488, "step": 5308 }, { "epoch": 0.009415068897559263, "grad_norm": 6.625, "learning_rate": 0.00199088006433313, "loss": 0.5172, "step": 5310 }, { "epoch": 0.009418615062869078, "grad_norm": 0.419921875, "learning_rate": 0.0019908716107703626, "loss": 0.2794, "step": 5312 }, { "epoch": 0.009422161228178894, "grad_norm": 1.3203125, "learning_rate": 0.00199086315331144, "loss": 0.3641, "step": 5314 }, { "epoch": 0.009425707393488709, "grad_norm": 0.54296875, "learning_rate": 0.0019908546919564, "loss": 0.2582, "step": 5316 }, { "epoch": 0.009429253558798523, "grad_norm": 0.380859375, "learning_rate": 0.001990846226705279, "loss": 0.3349, "step": 5318 }, { "epoch": 0.00943279972410834, "grad_norm": 1.1640625, "learning_rate": 0.001990837757558114, "loss": 0.2865, "step": 5320 }, { "epoch": 0.009436345889418154, "grad_norm": 0.486328125, "learning_rate": 0.0019908292845149415, "loss": 0.2807, "step": 5322 }, { "epoch": 0.009439892054727969, "grad_norm": 0.7421875, "learning_rate": 0.0019908208075757996, "loss": 0.2805, "step": 5324 }, { "epoch": 0.009443438220037785, "grad_norm": 0.60546875, "learning_rate": 0.001990812326740725, "loss": 0.28, "step": 5326 }, { "epoch": 0.0094469843853476, "grad_norm": 4.875, "learning_rate": 0.001990803842009755, "loss": 0.3472, "step": 5328 }, { "epoch": 0.009450530550657414, "grad_norm": 0.31640625, "learning_rate": 0.001990795353382926, "loss": 0.2013, "step": 5330 }, { "epoch": 0.00945407671596723, "grad_norm": 0.37890625, "learning_rate": 0.0019907868608602755, "loss": 0.2834, "step": 5332 }, { "epoch": 0.009457622881277045, "grad_norm": 0.416015625, "learning_rate": 0.001990778364441841, "loss": 0.2338, "step": 5334 }, { "epoch": 0.00946116904658686, "grad_norm": 0.56640625, "learning_rate": 0.001990769864127659, "loss": 0.2797, "step": 5336 }, { "epoch": 0.009464715211896676, "grad_norm": 0.61328125, "learning_rate": 0.001990761359917767, "loss": 0.2832, "step": 5338 }, { "epoch": 0.00946826137720649, "grad_norm": 1.453125, "learning_rate": 0.001990752851812203, "loss": 0.3602, "step": 5340 }, { "epoch": 0.009471807542516305, "grad_norm": 0.59765625, "learning_rate": 0.0019907443398110027, "loss": 0.3086, "step": 5342 }, { "epoch": 0.009475353707826121, "grad_norm": 0.6953125, "learning_rate": 0.0019907358239142046, "loss": 0.2671, "step": 5344 }, { "epoch": 0.009478899873135936, "grad_norm": 0.4140625, "learning_rate": 0.001990727304121845, "loss": 0.2537, "step": 5346 }, { "epoch": 0.009482446038445752, "grad_norm": 1.84375, "learning_rate": 0.001990718780433962, "loss": 0.3107, "step": 5348 }, { "epoch": 0.009485992203755567, "grad_norm": 0.28515625, "learning_rate": 0.0019907102528505917, "loss": 0.2356, "step": 5350 }, { "epoch": 0.009489538369065381, "grad_norm": 0.71484375, "learning_rate": 0.0019907017213717727, "loss": 0.241, "step": 5352 }, { "epoch": 0.009493084534375197, "grad_norm": 0.9609375, "learning_rate": 0.0019906931859975416, "loss": 0.2512, "step": 5354 }, { "epoch": 0.009496630699685012, "grad_norm": 0.337890625, "learning_rate": 0.001990684646727936, "loss": 0.3035, "step": 5356 }, { "epoch": 0.009500176864994827, "grad_norm": 0.2421875, "learning_rate": 0.0019906761035629926, "loss": 0.2676, "step": 5358 }, { "epoch": 0.009503723030304643, "grad_norm": 0.28515625, "learning_rate": 0.00199066755650275, "loss": 0.2732, "step": 5360 }, { "epoch": 0.009507269195614457, "grad_norm": 0.890625, "learning_rate": 0.0019906590055472446, "loss": 0.3169, "step": 5362 }, { "epoch": 0.009510815360924272, "grad_norm": 0.259765625, "learning_rate": 0.001990650450696514, "loss": 0.2364, "step": 5364 }, { "epoch": 0.009514361526234088, "grad_norm": 1.6640625, "learning_rate": 0.001990641891950596, "loss": 0.2666, "step": 5366 }, { "epoch": 0.009517907691543903, "grad_norm": 2.109375, "learning_rate": 0.001990633329309527, "loss": 0.4231, "step": 5368 }, { "epoch": 0.009521453856853717, "grad_norm": 0.314453125, "learning_rate": 0.0019906247627733457, "loss": 0.2215, "step": 5370 }, { "epoch": 0.009525000022163534, "grad_norm": 6.75, "learning_rate": 0.001990616192342089, "loss": 0.4135, "step": 5372 }, { "epoch": 0.009528546187473348, "grad_norm": 0.55078125, "learning_rate": 0.0019906076180157945, "loss": 0.3483, "step": 5374 }, { "epoch": 0.009532092352783163, "grad_norm": 0.51171875, "learning_rate": 0.0019905990397944993, "loss": 0.3112, "step": 5376 }, { "epoch": 0.009535638518092979, "grad_norm": 0.67578125, "learning_rate": 0.001990590457678241, "loss": 0.3207, "step": 5378 }, { "epoch": 0.009539184683402794, "grad_norm": 0.80078125, "learning_rate": 0.001990581871667058, "loss": 0.2671, "step": 5380 }, { "epoch": 0.00954273084871261, "grad_norm": 0.45703125, "learning_rate": 0.0019905732817609868, "loss": 0.338, "step": 5382 }, { "epoch": 0.009546277014022424, "grad_norm": 0.5078125, "learning_rate": 0.0019905646879600654, "loss": 0.316, "step": 5384 }, { "epoch": 0.009549823179332239, "grad_norm": 0.7578125, "learning_rate": 0.0019905560902643317, "loss": 0.2966, "step": 5386 }, { "epoch": 0.009553369344642055, "grad_norm": 0.88671875, "learning_rate": 0.001990547488673823, "loss": 0.2656, "step": 5388 }, { "epoch": 0.00955691550995187, "grad_norm": 0.546875, "learning_rate": 0.001990538883188576, "loss": 0.2888, "step": 5390 }, { "epoch": 0.009560461675261684, "grad_norm": 0.498046875, "learning_rate": 0.00199053027380863, "loss": 0.3572, "step": 5392 }, { "epoch": 0.0095640078405715, "grad_norm": 1.515625, "learning_rate": 0.001990521660534022, "loss": 0.5049, "step": 5394 }, { "epoch": 0.009567554005881315, "grad_norm": 0.4140625, "learning_rate": 0.001990513043364789, "loss": 0.3315, "step": 5396 }, { "epoch": 0.00957110017119113, "grad_norm": 0.392578125, "learning_rate": 0.001990504422300969, "loss": 0.2469, "step": 5398 }, { "epoch": 0.009574646336500946, "grad_norm": 1.015625, "learning_rate": 0.0019904957973426005, "loss": 0.3216, "step": 5400 }, { "epoch": 0.00957819250181076, "grad_norm": 0.4921875, "learning_rate": 0.0019904871684897204, "loss": 0.3441, "step": 5402 }, { "epoch": 0.009581738667120575, "grad_norm": 0.373046875, "learning_rate": 0.001990478535742367, "loss": 0.3262, "step": 5404 }, { "epoch": 0.009585284832430391, "grad_norm": 0.55859375, "learning_rate": 0.0019904698991005778, "loss": 0.4011, "step": 5406 }, { "epoch": 0.009588830997740206, "grad_norm": 0.33984375, "learning_rate": 0.0019904612585643897, "loss": 0.289, "step": 5408 }, { "epoch": 0.00959237716305002, "grad_norm": 0.76171875, "learning_rate": 0.001990452614133842, "loss": 0.3158, "step": 5410 }, { "epoch": 0.009595923328359837, "grad_norm": 0.330078125, "learning_rate": 0.0019904439658089716, "loss": 0.5208, "step": 5412 }, { "epoch": 0.009599469493669651, "grad_norm": 0.50390625, "learning_rate": 0.0019904353135898165, "loss": 0.313, "step": 5414 }, { "epoch": 0.009603015658979468, "grad_norm": 0.515625, "learning_rate": 0.0019904266574764145, "loss": 0.2822, "step": 5416 }, { "epoch": 0.009606561824289282, "grad_norm": 2.859375, "learning_rate": 0.0019904179974688033, "loss": 0.4619, "step": 5418 }, { "epoch": 0.009610107989599097, "grad_norm": 0.48828125, "learning_rate": 0.0019904093335670215, "loss": 0.2557, "step": 5420 }, { "epoch": 0.009613654154908913, "grad_norm": 0.76953125, "learning_rate": 0.0019904006657711065, "loss": 0.3193, "step": 5422 }, { "epoch": 0.009617200320218728, "grad_norm": 0.53125, "learning_rate": 0.001990391994081096, "loss": 0.2368, "step": 5424 }, { "epoch": 0.009620746485528542, "grad_norm": 0.26953125, "learning_rate": 0.0019903833184970283, "loss": 0.2528, "step": 5426 }, { "epoch": 0.009624292650838358, "grad_norm": 0.431640625, "learning_rate": 0.0019903746390189407, "loss": 0.2841, "step": 5428 }, { "epoch": 0.009627838816148173, "grad_norm": 0.28125, "learning_rate": 0.0019903659556468715, "loss": 0.27, "step": 5430 }, { "epoch": 0.009631384981457988, "grad_norm": 1.7109375, "learning_rate": 0.0019903572683808595, "loss": 0.2637, "step": 5432 }, { "epoch": 0.009634931146767804, "grad_norm": 0.578125, "learning_rate": 0.001990348577220942, "loss": 0.2954, "step": 5434 }, { "epoch": 0.009638477312077618, "grad_norm": 0.302734375, "learning_rate": 0.0019903398821671564, "loss": 0.2343, "step": 5436 }, { "epoch": 0.009642023477387433, "grad_norm": 0.6328125, "learning_rate": 0.0019903311832195417, "loss": 0.27, "step": 5438 }, { "epoch": 0.00964556964269725, "grad_norm": 0.578125, "learning_rate": 0.0019903224803781354, "loss": 0.2144, "step": 5440 }, { "epoch": 0.009649115808007064, "grad_norm": 0.51171875, "learning_rate": 0.0019903137736429757, "loss": 0.2683, "step": 5442 }, { "epoch": 0.009652661973316878, "grad_norm": 0.75, "learning_rate": 0.001990305063014101, "loss": 0.2296, "step": 5444 }, { "epoch": 0.009656208138626695, "grad_norm": 0.46875, "learning_rate": 0.001990296348491549, "loss": 0.2922, "step": 5446 }, { "epoch": 0.00965975430393651, "grad_norm": 0.486328125, "learning_rate": 0.001990287630075357, "loss": 0.2725, "step": 5448 }, { "epoch": 0.009663300469246324, "grad_norm": 0.431640625, "learning_rate": 0.0019902789077655652, "loss": 0.2249, "step": 5450 }, { "epoch": 0.00966684663455614, "grad_norm": 0.546875, "learning_rate": 0.0019902701815622103, "loss": 0.3001, "step": 5452 }, { "epoch": 0.009670392799865955, "grad_norm": 0.51953125, "learning_rate": 0.001990261451465331, "loss": 0.2404, "step": 5454 }, { "epoch": 0.009673938965175771, "grad_norm": 1.71875, "learning_rate": 0.001990252717474965, "loss": 0.2583, "step": 5456 }, { "epoch": 0.009677485130485585, "grad_norm": 0.408203125, "learning_rate": 0.001990243979591151, "loss": 0.2287, "step": 5458 }, { "epoch": 0.0096810312957954, "grad_norm": 0.296875, "learning_rate": 0.001990235237813926, "loss": 0.2383, "step": 5460 }, { "epoch": 0.009684577461105216, "grad_norm": 1.71875, "learning_rate": 0.00199022649214333, "loss": 0.4918, "step": 5462 }, { "epoch": 0.00968812362641503, "grad_norm": 0.71875, "learning_rate": 0.0019902177425794, "loss": 0.2526, "step": 5464 }, { "epoch": 0.009691669791724845, "grad_norm": 0.2333984375, "learning_rate": 0.0019902089891221755, "loss": 0.2841, "step": 5466 }, { "epoch": 0.009695215957034662, "grad_norm": 0.36328125, "learning_rate": 0.001990200231771693, "loss": 0.3146, "step": 5468 }, { "epoch": 0.009698762122344476, "grad_norm": 0.69140625, "learning_rate": 0.001990191470527992, "loss": 0.2704, "step": 5470 }, { "epoch": 0.00970230828765429, "grad_norm": 0.9921875, "learning_rate": 0.001990182705391111, "loss": 0.345, "step": 5472 }, { "epoch": 0.009705854452964107, "grad_norm": 1.265625, "learning_rate": 0.0019901739363610877, "loss": 0.6601, "step": 5474 }, { "epoch": 0.009709400618273922, "grad_norm": 1.0390625, "learning_rate": 0.0019901651634379606, "loss": 0.2783, "step": 5476 }, { "epoch": 0.009712946783583736, "grad_norm": 1.0859375, "learning_rate": 0.0019901563866217683, "loss": 0.2675, "step": 5478 }, { "epoch": 0.009716492948893552, "grad_norm": 0.30078125, "learning_rate": 0.001990147605912549, "loss": 0.251, "step": 5480 }, { "epoch": 0.009720039114203367, "grad_norm": 0.58984375, "learning_rate": 0.001990138821310341, "loss": 0.2504, "step": 5482 }, { "epoch": 0.009723585279513182, "grad_norm": 2.15625, "learning_rate": 0.001990130032815183, "loss": 0.4118, "step": 5484 }, { "epoch": 0.009727131444822998, "grad_norm": 0.3984375, "learning_rate": 0.001990121240427113, "loss": 0.2073, "step": 5486 }, { "epoch": 0.009730677610132812, "grad_norm": 0.96484375, "learning_rate": 0.0019901124441461704, "loss": 0.6021, "step": 5488 }, { "epoch": 0.009734223775442629, "grad_norm": 0.7578125, "learning_rate": 0.0019901036439723923, "loss": 0.2819, "step": 5490 }, { "epoch": 0.009737769940752443, "grad_norm": 0.28125, "learning_rate": 0.0019900948399058185, "loss": 0.2678, "step": 5492 }, { "epoch": 0.009741316106062258, "grad_norm": 0.408203125, "learning_rate": 0.0019900860319464865, "loss": 0.2186, "step": 5494 }, { "epoch": 0.009744862271372074, "grad_norm": 0.359375, "learning_rate": 0.001990077220094435, "loss": 0.3387, "step": 5496 }, { "epoch": 0.009748408436681889, "grad_norm": 0.76171875, "learning_rate": 0.0019900684043497037, "loss": 0.2981, "step": 5498 }, { "epoch": 0.009751954601991703, "grad_norm": 0.2734375, "learning_rate": 0.001990059584712329, "loss": 0.2693, "step": 5500 }, { "epoch": 0.00975550076730152, "grad_norm": 0.283203125, "learning_rate": 0.0019900507611823517, "loss": 0.2463, "step": 5502 }, { "epoch": 0.009759046932611334, "grad_norm": 0.416015625, "learning_rate": 0.0019900419337598087, "loss": 0.2784, "step": 5504 }, { "epoch": 0.009762593097921149, "grad_norm": 0.283203125, "learning_rate": 0.0019900331024447393, "loss": 0.2042, "step": 5506 }, { "epoch": 0.009766139263230965, "grad_norm": 0.52734375, "learning_rate": 0.001990024267237183, "loss": 0.2527, "step": 5508 }, { "epoch": 0.00976968542854078, "grad_norm": 0.67578125, "learning_rate": 0.0019900154281371762, "loss": 0.2652, "step": 5510 }, { "epoch": 0.009773231593850594, "grad_norm": 0.29296875, "learning_rate": 0.00199000658514476, "loss": 0.3096, "step": 5512 }, { "epoch": 0.00977677775916041, "grad_norm": 0.859375, "learning_rate": 0.0019899977382599712, "loss": 0.2882, "step": 5514 }, { "epoch": 0.009780323924470225, "grad_norm": 0.248046875, "learning_rate": 0.0019899888874828496, "loss": 0.3353, "step": 5516 }, { "epoch": 0.00978387008978004, "grad_norm": 0.2578125, "learning_rate": 0.0019899800328134335, "loss": 0.2215, "step": 5518 }, { "epoch": 0.009787416255089856, "grad_norm": 0.314453125, "learning_rate": 0.0019899711742517616, "loss": 0.241, "step": 5520 }, { "epoch": 0.00979096242039967, "grad_norm": 0.890625, "learning_rate": 0.001989962311797873, "loss": 0.3184, "step": 5522 }, { "epoch": 0.009794508585709487, "grad_norm": 0.59765625, "learning_rate": 0.001989953445451806, "loss": 0.272, "step": 5524 }, { "epoch": 0.009798054751019301, "grad_norm": 0.69921875, "learning_rate": 0.0019899445752136, "loss": 0.1761, "step": 5526 }, { "epoch": 0.009801600916329116, "grad_norm": 1.71875, "learning_rate": 0.001989935701083293, "loss": 0.4969, "step": 5528 }, { "epoch": 0.009805147081638932, "grad_norm": 1.2890625, "learning_rate": 0.001989926823060924, "loss": 0.434, "step": 5530 }, { "epoch": 0.009808693246948746, "grad_norm": 0.306640625, "learning_rate": 0.0019899179411465326, "loss": 0.3315, "step": 5532 }, { "epoch": 0.009812239412258561, "grad_norm": 0.39453125, "learning_rate": 0.0019899090553401563, "loss": 0.451, "step": 5534 }, { "epoch": 0.009815785577568377, "grad_norm": 0.58984375, "learning_rate": 0.001989900165641835, "loss": 0.4802, "step": 5536 }, { "epoch": 0.009819331742878192, "grad_norm": 0.2578125, "learning_rate": 0.001989891272051608, "loss": 0.2513, "step": 5538 }, { "epoch": 0.009822877908188006, "grad_norm": 0.224609375, "learning_rate": 0.0019898823745695127, "loss": 0.2198, "step": 5540 }, { "epoch": 0.009826424073497823, "grad_norm": 5.65625, "learning_rate": 0.0019898734731955887, "loss": 0.3903, "step": 5542 }, { "epoch": 0.009829970238807637, "grad_norm": 0.435546875, "learning_rate": 0.0019898645679298755, "loss": 0.2811, "step": 5544 }, { "epoch": 0.009833516404117452, "grad_norm": 0.515625, "learning_rate": 0.001989855658772411, "loss": 0.258, "step": 5546 }, { "epoch": 0.009837062569427268, "grad_norm": 0.50390625, "learning_rate": 0.0019898467457232353, "loss": 0.2887, "step": 5548 }, { "epoch": 0.009840608734737083, "grad_norm": 1.1015625, "learning_rate": 0.0019898378287823864, "loss": 0.3219, "step": 5550 }, { "epoch": 0.009844154900046897, "grad_norm": 1.1015625, "learning_rate": 0.001989828907949904, "loss": 0.6607, "step": 5552 }, { "epoch": 0.009847701065356713, "grad_norm": 0.322265625, "learning_rate": 0.001989819983225827, "loss": 0.27, "step": 5554 }, { "epoch": 0.009851247230666528, "grad_norm": 0.34765625, "learning_rate": 0.001989811054610194, "loss": 0.2863, "step": 5556 }, { "epoch": 0.009854793395976344, "grad_norm": 0.82421875, "learning_rate": 0.0019898021221030444, "loss": 0.3375, "step": 5558 }, { "epoch": 0.009858339561286159, "grad_norm": 1.046875, "learning_rate": 0.0019897931857044172, "loss": 0.2429, "step": 5560 }, { "epoch": 0.009861885726595973, "grad_norm": 1.0703125, "learning_rate": 0.0019897842454143513, "loss": 0.3023, "step": 5562 }, { "epoch": 0.00986543189190579, "grad_norm": 0.6953125, "learning_rate": 0.001989775301232886, "loss": 0.284, "step": 5564 }, { "epoch": 0.009868978057215604, "grad_norm": 0.51171875, "learning_rate": 0.0019897663531600607, "loss": 0.3629, "step": 5566 }, { "epoch": 0.009872524222525419, "grad_norm": 0.703125, "learning_rate": 0.0019897574011959137, "loss": 0.2543, "step": 5568 }, { "epoch": 0.009876070387835235, "grad_norm": 0.33984375, "learning_rate": 0.001989748445340485, "loss": 0.2965, "step": 5570 }, { "epoch": 0.00987961655314505, "grad_norm": 0.49609375, "learning_rate": 0.0019897394855938133, "loss": 0.2777, "step": 5572 }, { "epoch": 0.009883162718454864, "grad_norm": 0.5546875, "learning_rate": 0.001989730521955938, "loss": 0.2765, "step": 5574 }, { "epoch": 0.00988670888376468, "grad_norm": 0.462890625, "learning_rate": 0.001989721554426898, "loss": 0.2722, "step": 5576 }, { "epoch": 0.009890255049074495, "grad_norm": 0.365234375, "learning_rate": 0.001989712583006733, "loss": 0.2657, "step": 5578 }, { "epoch": 0.00989380121438431, "grad_norm": 0.6328125, "learning_rate": 0.001989703607695482, "loss": 0.2019, "step": 5580 }, { "epoch": 0.009897347379694126, "grad_norm": 0.43359375, "learning_rate": 0.001989694628493184, "loss": 0.3284, "step": 5582 }, { "epoch": 0.00990089354500394, "grad_norm": 0.419921875, "learning_rate": 0.0019896856453998782, "loss": 0.2827, "step": 5584 }, { "epoch": 0.009904439710313755, "grad_norm": 0.73046875, "learning_rate": 0.0019896766584156047, "loss": 0.3349, "step": 5586 }, { "epoch": 0.009907985875623571, "grad_norm": 0.453125, "learning_rate": 0.001989667667540402, "loss": 0.2096, "step": 5588 }, { "epoch": 0.009911532040933386, "grad_norm": 0.99609375, "learning_rate": 0.00198965867277431, "loss": 0.4079, "step": 5590 }, { "epoch": 0.009915078206243202, "grad_norm": 2.984375, "learning_rate": 0.0019896496741173674, "loss": 0.2277, "step": 5592 }, { "epoch": 0.009918624371553017, "grad_norm": 0.70703125, "learning_rate": 0.001989640671569614, "loss": 0.3198, "step": 5594 }, { "epoch": 0.009922170536862831, "grad_norm": 1.5625, "learning_rate": 0.0019896316651310895, "loss": 0.3016, "step": 5596 }, { "epoch": 0.009925716702172648, "grad_norm": 0.294921875, "learning_rate": 0.0019896226548018325, "loss": 0.2796, "step": 5598 }, { "epoch": 0.009929262867482462, "grad_norm": 0.58203125, "learning_rate": 0.0019896136405818826, "loss": 0.2614, "step": 5600 }, { "epoch": 0.009932809032792277, "grad_norm": 0.248046875, "learning_rate": 0.0019896046224712792, "loss": 0.2003, "step": 5602 }, { "epoch": 0.009936355198102093, "grad_norm": 3.09375, "learning_rate": 0.0019895956004700624, "loss": 0.3767, "step": 5604 }, { "epoch": 0.009939901363411907, "grad_norm": 0.416015625, "learning_rate": 0.001989586574578271, "loss": 0.2424, "step": 5606 }, { "epoch": 0.009943447528721722, "grad_norm": 3.9375, "learning_rate": 0.0019895775447959447, "loss": 0.2097, "step": 5608 }, { "epoch": 0.009946993694031538, "grad_norm": 0.546875, "learning_rate": 0.001989568511123123, "loss": 0.3036, "step": 5610 }, { "epoch": 0.009950539859341353, "grad_norm": 1.8125, "learning_rate": 0.001989559473559845, "loss": 0.2509, "step": 5612 }, { "epoch": 0.009954086024651167, "grad_norm": 0.64453125, "learning_rate": 0.0019895504321061513, "loss": 0.224, "step": 5614 }, { "epoch": 0.009957632189960984, "grad_norm": 2.53125, "learning_rate": 0.0019895413867620803, "loss": 0.371, "step": 5616 }, { "epoch": 0.009961178355270798, "grad_norm": 0.5, "learning_rate": 0.0019895323375276716, "loss": 0.2906, "step": 5618 }, { "epoch": 0.009964724520580613, "grad_norm": 0.30078125, "learning_rate": 0.001989523284402966, "loss": 0.2248, "step": 5620 }, { "epoch": 0.009968270685890429, "grad_norm": 0.40625, "learning_rate": 0.0019895142273880016, "loss": 0.2878, "step": 5622 }, { "epoch": 0.009971816851200244, "grad_norm": 0.482421875, "learning_rate": 0.0019895051664828188, "loss": 0.2276, "step": 5624 }, { "epoch": 0.00997536301651006, "grad_norm": 0.8125, "learning_rate": 0.0019894961016874574, "loss": 0.3383, "step": 5626 }, { "epoch": 0.009978909181819874, "grad_norm": 0.90234375, "learning_rate": 0.0019894870330019565, "loss": 0.2168, "step": 5628 }, { "epoch": 0.009982455347129689, "grad_norm": 0.6484375, "learning_rate": 0.0019894779604263555, "loss": 0.23, "step": 5630 }, { "epoch": 0.009986001512439505, "grad_norm": 0.359375, "learning_rate": 0.0019894688839606953, "loss": 0.2882, "step": 5632 }, { "epoch": 0.00998954767774932, "grad_norm": 0.55078125, "learning_rate": 0.0019894598036050144, "loss": 0.3022, "step": 5634 }, { "epoch": 0.009993093843059134, "grad_norm": 0.388671875, "learning_rate": 0.0019894507193593536, "loss": 0.2219, "step": 5636 }, { "epoch": 0.00999664000836895, "grad_norm": 0.6875, "learning_rate": 0.0019894416312237514, "loss": 0.2551, "step": 5638 }, { "epoch": 0.010000186173678765, "grad_norm": 0.296875, "learning_rate": 0.001989432539198248, "loss": 0.5288, "step": 5640 }, { "epoch": 0.01000373233898858, "grad_norm": 1.6953125, "learning_rate": 0.001989423443282884, "loss": 0.4341, "step": 5642 }, { "epoch": 0.010007278504298396, "grad_norm": 0.875, "learning_rate": 0.001989414343477698, "loss": 0.275, "step": 5644 }, { "epoch": 0.01001082466960821, "grad_norm": 0.365234375, "learning_rate": 0.00198940523978273, "loss": 0.2078, "step": 5646 }, { "epoch": 0.010014370834918025, "grad_norm": 0.458984375, "learning_rate": 0.001989396132198021, "loss": 0.4167, "step": 5648 }, { "epoch": 0.010017917000227841, "grad_norm": 0.2265625, "learning_rate": 0.0019893870207236095, "loss": 0.2656, "step": 5650 }, { "epoch": 0.010021463165537656, "grad_norm": 2.546875, "learning_rate": 0.0019893779053595357, "loss": 0.3801, "step": 5652 }, { "epoch": 0.01002500933084747, "grad_norm": 0.408203125, "learning_rate": 0.0019893687861058398, "loss": 0.2986, "step": 5654 }, { "epoch": 0.010028555496157287, "grad_norm": 0.419921875, "learning_rate": 0.001989359662962561, "loss": 0.2402, "step": 5656 }, { "epoch": 0.010032101661467101, "grad_norm": 0.4140625, "learning_rate": 0.00198935053592974, "loss": 0.2696, "step": 5658 }, { "epoch": 0.010035647826776918, "grad_norm": 1.1484375, "learning_rate": 0.001989341405007416, "loss": 0.1741, "step": 5660 }, { "epoch": 0.010039193992086732, "grad_norm": 0.59765625, "learning_rate": 0.00198933227019563, "loss": 0.2502, "step": 5662 }, { "epoch": 0.010042740157396547, "grad_norm": 0.3671875, "learning_rate": 0.0019893231314944207, "loss": 0.3084, "step": 5664 }, { "epoch": 0.010046286322706363, "grad_norm": 0.78515625, "learning_rate": 0.0019893139889038285, "loss": 0.4311, "step": 5666 }, { "epoch": 0.010049832488016178, "grad_norm": 0.4609375, "learning_rate": 0.0019893048424238936, "loss": 0.2815, "step": 5668 }, { "epoch": 0.010053378653325992, "grad_norm": 1.609375, "learning_rate": 0.0019892956920546565, "loss": 0.3271, "step": 5670 }, { "epoch": 0.010056924818635809, "grad_norm": 1.2734375, "learning_rate": 0.001989286537796156, "loss": 0.2578, "step": 5672 }, { "epoch": 0.010060470983945623, "grad_norm": 0.52734375, "learning_rate": 0.0019892773796484323, "loss": 0.2424, "step": 5674 }, { "epoch": 0.010064017149255438, "grad_norm": 0.3828125, "learning_rate": 0.0019892682176115267, "loss": 0.2775, "step": 5676 }, { "epoch": 0.010067563314565254, "grad_norm": 0.3984375, "learning_rate": 0.001989259051685478, "loss": 0.3256, "step": 5678 }, { "epoch": 0.010071109479875068, "grad_norm": 0.578125, "learning_rate": 0.001989249881870327, "loss": 0.3471, "step": 5680 }, { "epoch": 0.010074655645184883, "grad_norm": 0.51171875, "learning_rate": 0.0019892407081661136, "loss": 0.2242, "step": 5682 }, { "epoch": 0.0100782018104947, "grad_norm": 0.42578125, "learning_rate": 0.0019892315305728775, "loss": 0.2751, "step": 5684 }, { "epoch": 0.010081747975804514, "grad_norm": 0.51171875, "learning_rate": 0.0019892223490906597, "loss": 0.3133, "step": 5686 }, { "epoch": 0.010085294141114328, "grad_norm": 1.7265625, "learning_rate": 0.0019892131637195, "loss": 0.2903, "step": 5688 }, { "epoch": 0.010088840306424145, "grad_norm": 0.2314453125, "learning_rate": 0.0019892039744594378, "loss": 0.2409, "step": 5690 }, { "epoch": 0.01009238647173396, "grad_norm": 0.435546875, "learning_rate": 0.001989194781310514, "loss": 0.2348, "step": 5692 }, { "epoch": 0.010095932637043776, "grad_norm": 0.490234375, "learning_rate": 0.0019891855842727687, "loss": 0.2984, "step": 5694 }, { "epoch": 0.01009947880235359, "grad_norm": 1.25, "learning_rate": 0.001989176383346243, "loss": 0.3989, "step": 5696 }, { "epoch": 0.010103024967663405, "grad_norm": 0.640625, "learning_rate": 0.0019891671785309756, "loss": 0.289, "step": 5698 }, { "epoch": 0.010106571132973221, "grad_norm": 1.203125, "learning_rate": 0.0019891579698270074, "loss": 0.4843, "step": 5700 }, { "epoch": 0.010110117298283035, "grad_norm": 1.0390625, "learning_rate": 0.0019891487572343785, "loss": 0.4377, "step": 5702 }, { "epoch": 0.01011366346359285, "grad_norm": 0.380859375, "learning_rate": 0.00198913954075313, "loss": 0.2682, "step": 5704 }, { "epoch": 0.010117209628902666, "grad_norm": 0.69921875, "learning_rate": 0.0019891303203833015, "loss": 0.2555, "step": 5706 }, { "epoch": 0.010120755794212481, "grad_norm": 0.494140625, "learning_rate": 0.001989121096124933, "loss": 0.2466, "step": 5708 }, { "epoch": 0.010124301959522295, "grad_norm": 1.1875, "learning_rate": 0.0019891118679780657, "loss": 0.4772, "step": 5710 }, { "epoch": 0.010127848124832112, "grad_norm": 0.2333984375, "learning_rate": 0.0019891026359427394, "loss": 0.2197, "step": 5712 }, { "epoch": 0.010131394290141926, "grad_norm": 2.859375, "learning_rate": 0.001989093400018995, "loss": 0.3882, "step": 5714 }, { "epoch": 0.01013494045545174, "grad_norm": 0.40234375, "learning_rate": 0.0019890841602068724, "loss": 0.2922, "step": 5716 }, { "epoch": 0.010138486620761557, "grad_norm": 0.66015625, "learning_rate": 0.0019890749165064115, "loss": 0.311, "step": 5718 }, { "epoch": 0.010142032786071372, "grad_norm": 0.2470703125, "learning_rate": 0.0019890656689176543, "loss": 0.2461, "step": 5720 }, { "epoch": 0.010145578951381186, "grad_norm": 0.953125, "learning_rate": 0.0019890564174406397, "loss": 0.2785, "step": 5722 }, { "epoch": 0.010149125116691003, "grad_norm": 0.7578125, "learning_rate": 0.0019890471620754085, "loss": 0.2713, "step": 5724 }, { "epoch": 0.010152671282000817, "grad_norm": 0.85546875, "learning_rate": 0.001989037902822002, "loss": 0.3803, "step": 5726 }, { "epoch": 0.010156217447310633, "grad_norm": 0.4375, "learning_rate": 0.00198902863968046, "loss": 0.2137, "step": 5728 }, { "epoch": 0.010159763612620448, "grad_norm": 0.91015625, "learning_rate": 0.0019890193726508224, "loss": 0.3113, "step": 5730 }, { "epoch": 0.010163309777930262, "grad_norm": 0.275390625, "learning_rate": 0.0019890101017331313, "loss": 0.2358, "step": 5732 }, { "epoch": 0.010166855943240079, "grad_norm": 0.298828125, "learning_rate": 0.0019890008269274266, "loss": 0.2295, "step": 5734 }, { "epoch": 0.010170402108549893, "grad_norm": 0.74609375, "learning_rate": 0.001988991548233748, "loss": 0.2922, "step": 5736 }, { "epoch": 0.010173948273859708, "grad_norm": 0.68359375, "learning_rate": 0.0019889822656521373, "loss": 0.2203, "step": 5738 }, { "epoch": 0.010177494439169524, "grad_norm": 0.57421875, "learning_rate": 0.001988972979182634, "loss": 0.2339, "step": 5740 }, { "epoch": 0.010181040604479339, "grad_norm": 1.453125, "learning_rate": 0.0019889636888252796, "loss": 0.2815, "step": 5742 }, { "epoch": 0.010184586769789153, "grad_norm": 0.33984375, "learning_rate": 0.0019889543945801145, "loss": 0.321, "step": 5744 }, { "epoch": 0.01018813293509897, "grad_norm": 3.9375, "learning_rate": 0.001988945096447179, "loss": 0.2809, "step": 5746 }, { "epoch": 0.010191679100408784, "grad_norm": 2.734375, "learning_rate": 0.0019889357944265144, "loss": 0.4626, "step": 5748 }, { "epoch": 0.010195225265718599, "grad_norm": 0.73828125, "learning_rate": 0.0019889264885181606, "loss": 0.2143, "step": 5750 }, { "epoch": 0.010198771431028415, "grad_norm": 0.44140625, "learning_rate": 0.001988917178722159, "loss": 0.1887, "step": 5752 }, { "epoch": 0.01020231759633823, "grad_norm": 0.34375, "learning_rate": 0.0019889078650385497, "loss": 0.3456, "step": 5754 }, { "epoch": 0.010205863761648044, "grad_norm": 0.408203125, "learning_rate": 0.0019888985474673737, "loss": 0.288, "step": 5756 }, { "epoch": 0.01020940992695786, "grad_norm": 0.640625, "learning_rate": 0.001988889226008672, "loss": 0.2324, "step": 5758 }, { "epoch": 0.010212956092267675, "grad_norm": 0.9453125, "learning_rate": 0.001988879900662485, "loss": 0.2174, "step": 5760 }, { "epoch": 0.010216502257577491, "grad_norm": 0.2490234375, "learning_rate": 0.0019888705714288537, "loss": 0.2459, "step": 5762 }, { "epoch": 0.010220048422887306, "grad_norm": 0.380859375, "learning_rate": 0.001988861238307819, "loss": 0.2782, "step": 5764 }, { "epoch": 0.01022359458819712, "grad_norm": 0.5, "learning_rate": 0.001988851901299421, "loss": 0.2213, "step": 5766 }, { "epoch": 0.010227140753506937, "grad_norm": 0.75, "learning_rate": 0.001988842560403702, "loss": 0.2552, "step": 5768 }, { "epoch": 0.010230686918816751, "grad_norm": 0.3984375, "learning_rate": 0.0019888332156207016, "loss": 0.2348, "step": 5770 }, { "epoch": 0.010234233084126566, "grad_norm": 0.361328125, "learning_rate": 0.0019888238669504604, "loss": 0.2418, "step": 5772 }, { "epoch": 0.010237779249436382, "grad_norm": 0.46875, "learning_rate": 0.0019888145143930206, "loss": 0.2455, "step": 5774 }, { "epoch": 0.010241325414746196, "grad_norm": 0.384765625, "learning_rate": 0.001988805157948422, "loss": 0.2102, "step": 5776 }, { "epoch": 0.010244871580056011, "grad_norm": 0.45703125, "learning_rate": 0.001988795797616706, "loss": 0.2653, "step": 5778 }, { "epoch": 0.010248417745365827, "grad_norm": 0.3671875, "learning_rate": 0.0019887864333979137, "loss": 0.2891, "step": 5780 }, { "epoch": 0.010251963910675642, "grad_norm": 0.416015625, "learning_rate": 0.0019887770652920857, "loss": 0.2714, "step": 5782 }, { "epoch": 0.010255510075985456, "grad_norm": 0.302734375, "learning_rate": 0.001988767693299263, "loss": 0.2319, "step": 5784 }, { "epoch": 0.010259056241295273, "grad_norm": 1.359375, "learning_rate": 0.0019887583174194867, "loss": 0.2893, "step": 5786 }, { "epoch": 0.010262602406605087, "grad_norm": 0.2890625, "learning_rate": 0.0019887489376527977, "loss": 0.5093, "step": 5788 }, { "epoch": 0.010266148571914902, "grad_norm": 0.22265625, "learning_rate": 0.001988739553999237, "loss": 0.2232, "step": 5790 }, { "epoch": 0.010269694737224718, "grad_norm": 0.62109375, "learning_rate": 0.001988730166458846, "loss": 0.2264, "step": 5792 }, { "epoch": 0.010273240902534533, "grad_norm": 1.2578125, "learning_rate": 0.0019887207750316654, "loss": 0.374, "step": 5794 }, { "epoch": 0.010276787067844349, "grad_norm": 1.2421875, "learning_rate": 0.001988711379717736, "loss": 0.4457, "step": 5796 }, { "epoch": 0.010280333233154164, "grad_norm": 0.78125, "learning_rate": 0.0019887019805170996, "loss": 0.2772, "step": 5798 }, { "epoch": 0.010283879398463978, "grad_norm": 0.53125, "learning_rate": 0.001988692577429797, "loss": 0.3339, "step": 5800 }, { "epoch": 0.010287425563773794, "grad_norm": 2.703125, "learning_rate": 0.0019886831704558692, "loss": 0.2457, "step": 5802 }, { "epoch": 0.010290971729083609, "grad_norm": 0.546875, "learning_rate": 0.001988673759595358, "loss": 0.2628, "step": 5804 }, { "epoch": 0.010294517894393423, "grad_norm": 0.388671875, "learning_rate": 0.001988664344848303, "loss": 0.256, "step": 5806 }, { "epoch": 0.01029806405970324, "grad_norm": 1.0546875, "learning_rate": 0.0019886549262147467, "loss": 0.3069, "step": 5808 }, { "epoch": 0.010301610225013054, "grad_norm": 0.515625, "learning_rate": 0.0019886455036947303, "loss": 0.3723, "step": 5810 }, { "epoch": 0.010305156390322869, "grad_norm": 0.81640625, "learning_rate": 0.001988636077288294, "loss": 0.3231, "step": 5812 }, { "epoch": 0.010308702555632685, "grad_norm": 0.42578125, "learning_rate": 0.0019886266469954805, "loss": 0.2946, "step": 5814 }, { "epoch": 0.0103122487209425, "grad_norm": 0.486328125, "learning_rate": 0.0019886172128163295, "loss": 0.2659, "step": 5816 }, { "epoch": 0.010315794886252314, "grad_norm": 0.51953125, "learning_rate": 0.001988607774750883, "loss": 0.2917, "step": 5818 }, { "epoch": 0.01031934105156213, "grad_norm": 0.197265625, "learning_rate": 0.0019885983327991826, "loss": 0.2409, "step": 5820 }, { "epoch": 0.010322887216871945, "grad_norm": 0.5234375, "learning_rate": 0.001988588886961269, "loss": 0.2564, "step": 5822 }, { "epoch": 0.01032643338218176, "grad_norm": 0.3515625, "learning_rate": 0.0019885794372371843, "loss": 0.2172, "step": 5824 }, { "epoch": 0.010329979547491576, "grad_norm": 0.88671875, "learning_rate": 0.001988569983626969, "loss": 0.557, "step": 5826 }, { "epoch": 0.01033352571280139, "grad_norm": 0.87109375, "learning_rate": 0.0019885605261306645, "loss": 0.3127, "step": 5828 }, { "epoch": 0.010337071878111207, "grad_norm": 0.515625, "learning_rate": 0.0019885510647483125, "loss": 0.2379, "step": 5830 }, { "epoch": 0.010340618043421021, "grad_norm": 3.0625, "learning_rate": 0.0019885415994799543, "loss": 0.304, "step": 5832 }, { "epoch": 0.010344164208730836, "grad_norm": 1.171875, "learning_rate": 0.0019885321303256312, "loss": 0.344, "step": 5834 }, { "epoch": 0.010347710374040652, "grad_norm": 1.125, "learning_rate": 0.001988522657285385, "loss": 0.5607, "step": 5836 }, { "epoch": 0.010351256539350467, "grad_norm": 1.4140625, "learning_rate": 0.0019885131803592565, "loss": 0.37, "step": 5838 }, { "epoch": 0.010354802704660281, "grad_norm": 0.33203125, "learning_rate": 0.0019885036995472877, "loss": 0.2129, "step": 5840 }, { "epoch": 0.010358348869970098, "grad_norm": 0.3125, "learning_rate": 0.0019884942148495196, "loss": 0.2877, "step": 5842 }, { "epoch": 0.010361895035279912, "grad_norm": 1.25, "learning_rate": 0.001988484726265994, "loss": 0.3503, "step": 5844 }, { "epoch": 0.010365441200589727, "grad_norm": 0.7265625, "learning_rate": 0.001988475233796752, "loss": 0.3453, "step": 5846 }, { "epoch": 0.010368987365899543, "grad_norm": 0.66796875, "learning_rate": 0.001988465737441836, "loss": 0.3034, "step": 5848 }, { "epoch": 0.010372533531209358, "grad_norm": 0.48828125, "learning_rate": 0.001988456237201287, "loss": 0.2343, "step": 5850 }, { "epoch": 0.010376079696519172, "grad_norm": 0.64453125, "learning_rate": 0.0019884467330751458, "loss": 0.3782, "step": 5852 }, { "epoch": 0.010379625861828988, "grad_norm": 0.326171875, "learning_rate": 0.0019884372250634544, "loss": 0.2747, "step": 5854 }, { "epoch": 0.010383172027138803, "grad_norm": 0.310546875, "learning_rate": 0.0019884277131662553, "loss": 0.2435, "step": 5856 }, { "epoch": 0.010386718192448617, "grad_norm": 0.82421875, "learning_rate": 0.0019884181973835896, "loss": 0.414, "step": 5858 }, { "epoch": 0.010390264357758434, "grad_norm": 0.326171875, "learning_rate": 0.0019884086777154984, "loss": 0.2359, "step": 5860 }, { "epoch": 0.010393810523068248, "grad_norm": 0.41796875, "learning_rate": 0.001988399154162024, "loss": 0.2979, "step": 5862 }, { "epoch": 0.010397356688378065, "grad_norm": 0.62890625, "learning_rate": 0.0019883896267232077, "loss": 0.2331, "step": 5864 }, { "epoch": 0.010400902853687879, "grad_norm": 1.1640625, "learning_rate": 0.001988380095399091, "loss": 0.3314, "step": 5866 }, { "epoch": 0.010404449018997694, "grad_norm": 0.5546875, "learning_rate": 0.001988370560189716, "loss": 0.2767, "step": 5868 }, { "epoch": 0.01040799518430751, "grad_norm": 0.357421875, "learning_rate": 0.0019883610210951236, "loss": 0.2005, "step": 5870 }, { "epoch": 0.010411541349617325, "grad_norm": 0.3203125, "learning_rate": 0.001988351478115357, "loss": 0.2644, "step": 5872 }, { "epoch": 0.010415087514927139, "grad_norm": 1.1796875, "learning_rate": 0.0019883419312504563, "loss": 0.3104, "step": 5874 }, { "epoch": 0.010418633680236955, "grad_norm": 0.375, "learning_rate": 0.0019883323805004647, "loss": 0.2735, "step": 5876 }, { "epoch": 0.01042217984554677, "grad_norm": 0.25, "learning_rate": 0.0019883228258654228, "loss": 0.2543, "step": 5878 }, { "epoch": 0.010425726010856584, "grad_norm": 0.578125, "learning_rate": 0.0019883132673453726, "loss": 0.2897, "step": 5880 }, { "epoch": 0.0104292721761664, "grad_norm": 0.65625, "learning_rate": 0.001988303704940357, "loss": 0.2792, "step": 5882 }, { "epoch": 0.010432818341476215, "grad_norm": 0.80078125, "learning_rate": 0.0019882941386504165, "loss": 0.3873, "step": 5884 }, { "epoch": 0.01043636450678603, "grad_norm": 0.66015625, "learning_rate": 0.0019882845684755933, "loss": 0.2324, "step": 5886 }, { "epoch": 0.010439910672095846, "grad_norm": 1.203125, "learning_rate": 0.0019882749944159293, "loss": 0.3238, "step": 5888 }, { "epoch": 0.01044345683740566, "grad_norm": 0.59375, "learning_rate": 0.001988265416471467, "loss": 0.2852, "step": 5890 }, { "epoch": 0.010447003002715475, "grad_norm": 5.4375, "learning_rate": 0.001988255834642247, "loss": 0.3307, "step": 5892 }, { "epoch": 0.010450549168025292, "grad_norm": 2.125, "learning_rate": 0.001988246248928313, "loss": 0.2993, "step": 5894 }, { "epoch": 0.010454095333335106, "grad_norm": 0.53515625, "learning_rate": 0.001988236659329705, "loss": 0.2431, "step": 5896 }, { "epoch": 0.010457641498644922, "grad_norm": 0.8046875, "learning_rate": 0.001988227065846466, "loss": 0.3667, "step": 5898 }, { "epoch": 0.010461187663954737, "grad_norm": 0.69921875, "learning_rate": 0.0019882174684786374, "loss": 0.3712, "step": 5900 }, { "epoch": 0.010464733829264551, "grad_norm": 3.578125, "learning_rate": 0.001988207867226262, "loss": 0.277, "step": 5902 }, { "epoch": 0.010468279994574368, "grad_norm": 0.5859375, "learning_rate": 0.001988198262089381, "loss": 0.3025, "step": 5904 }, { "epoch": 0.010471826159884182, "grad_norm": 0.796875, "learning_rate": 0.0019881886530680373, "loss": 0.2458, "step": 5906 }, { "epoch": 0.010475372325193997, "grad_norm": 0.326171875, "learning_rate": 0.001988179040162272, "loss": 0.3324, "step": 5908 }, { "epoch": 0.010478918490503813, "grad_norm": 0.80859375, "learning_rate": 0.0019881694233721274, "loss": 0.2186, "step": 5910 }, { "epoch": 0.010482464655813628, "grad_norm": 0.38671875, "learning_rate": 0.0019881598026976455, "loss": 0.1934, "step": 5912 }, { "epoch": 0.010486010821123442, "grad_norm": 0.3125, "learning_rate": 0.001988150178138869, "loss": 0.2527, "step": 5914 }, { "epoch": 0.010489556986433259, "grad_norm": 1.0546875, "learning_rate": 0.001988140549695839, "loss": 0.4183, "step": 5916 }, { "epoch": 0.010493103151743073, "grad_norm": 0.546875, "learning_rate": 0.0019881309173685985, "loss": 0.2469, "step": 5918 }, { "epoch": 0.010496649317052888, "grad_norm": 0.443359375, "learning_rate": 0.0019881212811571894, "loss": 0.3144, "step": 5920 }, { "epoch": 0.010500195482362704, "grad_norm": 0.515625, "learning_rate": 0.0019881116410616533, "loss": 0.2961, "step": 5922 }, { "epoch": 0.010503741647672519, "grad_norm": 0.453125, "learning_rate": 0.0019881019970820328, "loss": 0.2274, "step": 5924 }, { "epoch": 0.010507287812982333, "grad_norm": 1.1796875, "learning_rate": 0.0019880923492183703, "loss": 0.2423, "step": 5926 }, { "epoch": 0.01051083397829215, "grad_norm": 0.66796875, "learning_rate": 0.0019880826974707074, "loss": 0.3151, "step": 5928 }, { "epoch": 0.010514380143601964, "grad_norm": 0.55078125, "learning_rate": 0.001988073041839087, "loss": 0.2301, "step": 5930 }, { "epoch": 0.01051792630891178, "grad_norm": 0.328125, "learning_rate": 0.001988063382323551, "loss": 0.2753, "step": 5932 }, { "epoch": 0.010521472474221595, "grad_norm": 2.671875, "learning_rate": 0.001988053718924141, "loss": 0.4232, "step": 5934 }, { "epoch": 0.01052501863953141, "grad_norm": 3.234375, "learning_rate": 0.0019880440516409003, "loss": 0.3246, "step": 5936 }, { "epoch": 0.010528564804841226, "grad_norm": 0.361328125, "learning_rate": 0.001988034380473871, "loss": 0.2806, "step": 5938 }, { "epoch": 0.01053211097015104, "grad_norm": 0.79296875, "learning_rate": 0.0019880247054230946, "loss": 0.2824, "step": 5940 }, { "epoch": 0.010535657135460855, "grad_norm": 0.453125, "learning_rate": 0.0019880150264886143, "loss": 0.2641, "step": 5942 }, { "epoch": 0.010539203300770671, "grad_norm": 0.390625, "learning_rate": 0.0019880053436704724, "loss": 0.2486, "step": 5944 }, { "epoch": 0.010542749466080486, "grad_norm": 0.5546875, "learning_rate": 0.00198799565696871, "loss": 0.2487, "step": 5946 }, { "epoch": 0.0105462956313903, "grad_norm": 0.640625, "learning_rate": 0.0019879859663833716, "loss": 0.5694, "step": 5948 }, { "epoch": 0.010549841796700116, "grad_norm": 0.74609375, "learning_rate": 0.0019879762719144977, "loss": 0.3262, "step": 5950 }, { "epoch": 0.010553387962009931, "grad_norm": 0.322265625, "learning_rate": 0.0019879665735621312, "loss": 0.2522, "step": 5952 }, { "epoch": 0.010556934127319745, "grad_norm": 0.421875, "learning_rate": 0.0019879568713263153, "loss": 0.2822, "step": 5954 }, { "epoch": 0.010560480292629562, "grad_norm": 0.61328125, "learning_rate": 0.0019879471652070914, "loss": 0.2267, "step": 5956 }, { "epoch": 0.010564026457939376, "grad_norm": 1.578125, "learning_rate": 0.0019879374552045025, "loss": 0.339, "step": 5958 }, { "epoch": 0.010567572623249191, "grad_norm": 0.7890625, "learning_rate": 0.0019879277413185907, "loss": 0.2751, "step": 5960 }, { "epoch": 0.010571118788559007, "grad_norm": 0.96484375, "learning_rate": 0.0019879180235493994, "loss": 0.4019, "step": 5962 }, { "epoch": 0.010574664953868822, "grad_norm": 0.54296875, "learning_rate": 0.0019879083018969697, "loss": 0.2823, "step": 5964 }, { "epoch": 0.010578211119178638, "grad_norm": 0.8828125, "learning_rate": 0.001987898576361345, "loss": 0.2677, "step": 5966 }, { "epoch": 0.010581757284488453, "grad_norm": 0.35546875, "learning_rate": 0.001987888846942568, "loss": 0.246, "step": 5968 }, { "epoch": 0.010585303449798267, "grad_norm": 0.306640625, "learning_rate": 0.0019878791136406808, "loss": 0.2474, "step": 5970 }, { "epoch": 0.010588849615108083, "grad_norm": 0.2890625, "learning_rate": 0.0019878693764557257, "loss": 0.2399, "step": 5972 }, { "epoch": 0.010592395780417898, "grad_norm": 0.51171875, "learning_rate": 0.001987859635387746, "loss": 0.2762, "step": 5974 }, { "epoch": 0.010595941945727713, "grad_norm": 0.54296875, "learning_rate": 0.0019878498904367845, "loss": 0.2992, "step": 5976 }, { "epoch": 0.010599488111037529, "grad_norm": 1.140625, "learning_rate": 0.0019878401416028825, "loss": 0.2313, "step": 5978 }, { "epoch": 0.010603034276347343, "grad_norm": 1.2109375, "learning_rate": 0.001987830388886084, "loss": 0.5073, "step": 5980 }, { "epoch": 0.010606580441657158, "grad_norm": 0.400390625, "learning_rate": 0.0019878206322864306, "loss": 0.2226, "step": 5982 }, { "epoch": 0.010610126606966974, "grad_norm": 0.87109375, "learning_rate": 0.001987810871803966, "loss": 0.3639, "step": 5984 }, { "epoch": 0.010613672772276789, "grad_norm": 0.51953125, "learning_rate": 0.001987801107438732, "loss": 0.2666, "step": 5986 }, { "epoch": 0.010617218937586603, "grad_norm": 0.32421875, "learning_rate": 0.0019877913391907714, "loss": 0.2591, "step": 5988 }, { "epoch": 0.01062076510289642, "grad_norm": 4.6875, "learning_rate": 0.0019877815670601277, "loss": 0.2809, "step": 5990 }, { "epoch": 0.010624311268206234, "grad_norm": 0.494140625, "learning_rate": 0.0019877717910468428, "loss": 0.2693, "step": 5992 }, { "epoch": 0.010627857433516049, "grad_norm": 0.75390625, "learning_rate": 0.00198776201115096, "loss": 0.2601, "step": 5994 }, { "epoch": 0.010631403598825865, "grad_norm": 0.29296875, "learning_rate": 0.0019877522273725216, "loss": 0.2682, "step": 5996 }, { "epoch": 0.01063494976413568, "grad_norm": 1.1015625, "learning_rate": 0.0019877424397115708, "loss": 0.2899, "step": 5998 }, { "epoch": 0.010638495929445496, "grad_norm": 0.451171875, "learning_rate": 0.00198773264816815, "loss": 0.2456, "step": 6000 }, { "epoch": 0.01064204209475531, "grad_norm": 0.498046875, "learning_rate": 0.0019877228527423025, "loss": 0.2319, "step": 6002 }, { "epoch": 0.010645588260065125, "grad_norm": 0.5390625, "learning_rate": 0.0019877130534340704, "loss": 0.2378, "step": 6004 }, { "epoch": 0.010649134425374941, "grad_norm": 0.275390625, "learning_rate": 0.0019877032502434978, "loss": 0.2863, "step": 6006 }, { "epoch": 0.010652680590684756, "grad_norm": 0.390625, "learning_rate": 0.0019876934431706265, "loss": 0.2035, "step": 6008 }, { "epoch": 0.01065622675599457, "grad_norm": 0.51953125, "learning_rate": 0.0019876836322154996, "loss": 0.2592, "step": 6010 }, { "epoch": 0.010659772921304387, "grad_norm": 1.296875, "learning_rate": 0.0019876738173781605, "loss": 0.2982, "step": 6012 }, { "epoch": 0.010663319086614201, "grad_norm": 0.228515625, "learning_rate": 0.001987663998658651, "loss": 0.23, "step": 6014 }, { "epoch": 0.010666865251924016, "grad_norm": 0.2412109375, "learning_rate": 0.0019876541760570155, "loss": 0.2322, "step": 6016 }, { "epoch": 0.010670411417233832, "grad_norm": 0.267578125, "learning_rate": 0.001987644349573296, "loss": 0.2854, "step": 6018 }, { "epoch": 0.010673957582543647, "grad_norm": 0.6171875, "learning_rate": 0.0019876345192075357, "loss": 0.3641, "step": 6020 }, { "epoch": 0.010677503747853461, "grad_norm": 0.55078125, "learning_rate": 0.0019876246849597776, "loss": 0.227, "step": 6022 }, { "epoch": 0.010681049913163277, "grad_norm": 0.34765625, "learning_rate": 0.001987614846830065, "loss": 0.2549, "step": 6024 }, { "epoch": 0.010684596078473092, "grad_norm": 0.46875, "learning_rate": 0.00198760500481844, "loss": 0.2421, "step": 6026 }, { "epoch": 0.010688142243782906, "grad_norm": 0.625, "learning_rate": 0.001987595158924947, "loss": 0.2925, "step": 6028 }, { "epoch": 0.010691688409092723, "grad_norm": 2.703125, "learning_rate": 0.001987585309149628, "loss": 0.205, "step": 6030 }, { "epoch": 0.010695234574402537, "grad_norm": 0.384765625, "learning_rate": 0.001987575455492526, "loss": 0.4531, "step": 6032 }, { "epoch": 0.010698780739712354, "grad_norm": 0.2412109375, "learning_rate": 0.001987565597953685, "loss": 0.2631, "step": 6034 }, { "epoch": 0.010702326905022168, "grad_norm": 5.78125, "learning_rate": 0.0019875557365331476, "loss": 0.2783, "step": 6036 }, { "epoch": 0.010705873070331983, "grad_norm": 0.859375, "learning_rate": 0.001987545871230957, "loss": 0.254, "step": 6038 }, { "epoch": 0.010709419235641799, "grad_norm": 0.314453125, "learning_rate": 0.0019875360020471565, "loss": 0.2089, "step": 6040 }, { "epoch": 0.010712965400951614, "grad_norm": 0.3984375, "learning_rate": 0.0019875261289817887, "loss": 0.225, "step": 6042 }, { "epoch": 0.010716511566261428, "grad_norm": 0.578125, "learning_rate": 0.0019875162520348972, "loss": 0.2282, "step": 6044 }, { "epoch": 0.010720057731571244, "grad_norm": 0.71875, "learning_rate": 0.001987506371206525, "loss": 0.2575, "step": 6046 }, { "epoch": 0.010723603896881059, "grad_norm": 0.322265625, "learning_rate": 0.0019874964864967162, "loss": 0.2463, "step": 6048 }, { "epoch": 0.010727150062190874, "grad_norm": 0.578125, "learning_rate": 0.001987486597905513, "loss": 0.3057, "step": 6050 }, { "epoch": 0.01073069622750069, "grad_norm": 0.298828125, "learning_rate": 0.0019874767054329583, "loss": 0.259, "step": 6052 }, { "epoch": 0.010734242392810504, "grad_norm": 0.224609375, "learning_rate": 0.0019874668090790965, "loss": 0.2948, "step": 6054 }, { "epoch": 0.010737788558120319, "grad_norm": 0.474609375, "learning_rate": 0.0019874569088439704, "loss": 0.3562, "step": 6056 }, { "epoch": 0.010741334723430135, "grad_norm": 0.51171875, "learning_rate": 0.001987447004727623, "loss": 0.286, "step": 6058 }, { "epoch": 0.01074488088873995, "grad_norm": 1.0390625, "learning_rate": 0.001987437096730098, "loss": 0.4945, "step": 6060 }, { "epoch": 0.010748427054049764, "grad_norm": 0.466796875, "learning_rate": 0.001987427184851439, "loss": 0.2475, "step": 6062 }, { "epoch": 0.01075197321935958, "grad_norm": 0.60546875, "learning_rate": 0.0019874172690916886, "loss": 0.2243, "step": 6064 }, { "epoch": 0.010755519384669395, "grad_norm": 0.30859375, "learning_rate": 0.0019874073494508906, "loss": 0.3817, "step": 6066 }, { "epoch": 0.010759065549979211, "grad_norm": 1.015625, "learning_rate": 0.001987397425929088, "loss": 0.3127, "step": 6068 }, { "epoch": 0.010762611715289026, "grad_norm": 1.0703125, "learning_rate": 0.001987387498526325, "loss": 0.3228, "step": 6070 }, { "epoch": 0.01076615788059884, "grad_norm": 0.45703125, "learning_rate": 0.0019873775672426446, "loss": 0.2621, "step": 6072 }, { "epoch": 0.010769704045908657, "grad_norm": 0.48828125, "learning_rate": 0.00198736763207809, "loss": 0.2767, "step": 6074 }, { "epoch": 0.010773250211218471, "grad_norm": 0.6875, "learning_rate": 0.0019873576930327045, "loss": 0.2513, "step": 6076 }, { "epoch": 0.010776796376528286, "grad_norm": 1.03125, "learning_rate": 0.0019873477501065324, "loss": 0.3358, "step": 6078 }, { "epoch": 0.010780342541838102, "grad_norm": 0.734375, "learning_rate": 0.0019873378032996165, "loss": 0.2464, "step": 6080 }, { "epoch": 0.010783888707147917, "grad_norm": 0.4453125, "learning_rate": 0.0019873278526120006, "loss": 0.3333, "step": 6082 }, { "epoch": 0.010787434872457731, "grad_norm": 0.75, "learning_rate": 0.0019873178980437272, "loss": 0.2762, "step": 6084 }, { "epoch": 0.010790981037767548, "grad_norm": 0.48046875, "learning_rate": 0.001987307939594842, "loss": 0.2529, "step": 6086 }, { "epoch": 0.010794527203077362, "grad_norm": 0.41015625, "learning_rate": 0.0019872979772653865, "loss": 0.2608, "step": 6088 }, { "epoch": 0.010798073368387177, "grad_norm": 0.39453125, "learning_rate": 0.001987288011055405, "loss": 0.3697, "step": 6090 }, { "epoch": 0.010801619533696993, "grad_norm": 2.671875, "learning_rate": 0.0019872780409649414, "loss": 0.4219, "step": 6092 }, { "epoch": 0.010805165699006808, "grad_norm": 1.5234375, "learning_rate": 0.001987268066994039, "loss": 0.3257, "step": 6094 }, { "epoch": 0.010808711864316622, "grad_norm": 0.26171875, "learning_rate": 0.001987258089142742, "loss": 0.2178, "step": 6096 }, { "epoch": 0.010812258029626438, "grad_norm": 0.2099609375, "learning_rate": 0.0019872481074110927, "loss": 0.2763, "step": 6098 }, { "epoch": 0.010815804194936253, "grad_norm": 0.48046875, "learning_rate": 0.001987238121799136, "loss": 0.2948, "step": 6100 }, { "epoch": 0.01081935036024607, "grad_norm": 0.4921875, "learning_rate": 0.001987228132306915, "loss": 0.4501, "step": 6102 }, { "epoch": 0.010822896525555884, "grad_norm": 1.0625, "learning_rate": 0.0019872181389344735, "loss": 0.3309, "step": 6104 }, { "epoch": 0.010826442690865698, "grad_norm": 5.03125, "learning_rate": 0.0019872081416818553, "loss": 0.4552, "step": 6106 }, { "epoch": 0.010829988856175515, "grad_norm": 1.21875, "learning_rate": 0.001987198140549104, "loss": 0.2493, "step": 6108 }, { "epoch": 0.01083353502148533, "grad_norm": 0.400390625, "learning_rate": 0.001987188135536263, "loss": 0.2925, "step": 6110 }, { "epoch": 0.010837081186795144, "grad_norm": 0.390625, "learning_rate": 0.0019871781266433772, "loss": 0.2401, "step": 6112 }, { "epoch": 0.01084062735210496, "grad_norm": 0.63671875, "learning_rate": 0.001987168113870489, "loss": 0.2624, "step": 6114 }, { "epoch": 0.010844173517414775, "grad_norm": 0.78125, "learning_rate": 0.001987158097217643, "loss": 0.296, "step": 6116 }, { "epoch": 0.010847719682724589, "grad_norm": 0.24609375, "learning_rate": 0.0019871480766848826, "loss": 0.2174, "step": 6118 }, { "epoch": 0.010851265848034405, "grad_norm": 0.341796875, "learning_rate": 0.0019871380522722523, "loss": 0.2534, "step": 6120 }, { "epoch": 0.01085481201334422, "grad_norm": 1.015625, "learning_rate": 0.001987128023979795, "loss": 0.2794, "step": 6122 }, { "epoch": 0.010858358178654035, "grad_norm": 0.34375, "learning_rate": 0.0019871179918075554, "loss": 0.2548, "step": 6124 }, { "epoch": 0.01086190434396385, "grad_norm": 0.47265625, "learning_rate": 0.001987107955755577, "loss": 0.2993, "step": 6126 }, { "epoch": 0.010865450509273665, "grad_norm": 0.4765625, "learning_rate": 0.001987097915823903, "loss": 0.2443, "step": 6128 }, { "epoch": 0.01086899667458348, "grad_norm": 0.3125, "learning_rate": 0.0019870878720125787, "loss": 0.2306, "step": 6130 }, { "epoch": 0.010872542839893296, "grad_norm": 0.4765625, "learning_rate": 0.001987077824321647, "loss": 0.2146, "step": 6132 }, { "epoch": 0.01087608900520311, "grad_norm": 5.875, "learning_rate": 0.0019870677727511525, "loss": 0.3652, "step": 6134 }, { "epoch": 0.010879635170512927, "grad_norm": 1.203125, "learning_rate": 0.001987057717301138, "loss": 0.2946, "step": 6136 }, { "epoch": 0.010883181335822742, "grad_norm": 0.4921875, "learning_rate": 0.0019870476579716494, "loss": 0.264, "step": 6138 }, { "epoch": 0.010886727501132556, "grad_norm": 0.25390625, "learning_rate": 0.001987037594762729, "loss": 0.2724, "step": 6140 }, { "epoch": 0.010890273666442372, "grad_norm": 0.380859375, "learning_rate": 0.0019870275276744217, "loss": 0.2383, "step": 6142 }, { "epoch": 0.010893819831752187, "grad_norm": 0.625, "learning_rate": 0.0019870174567067716, "loss": 0.2365, "step": 6144 }, { "epoch": 0.010897365997062002, "grad_norm": 0.37890625, "learning_rate": 0.0019870073818598214, "loss": 0.2421, "step": 6146 }, { "epoch": 0.010900912162371818, "grad_norm": 0.65234375, "learning_rate": 0.0019869973031336166, "loss": 0.2806, "step": 6148 }, { "epoch": 0.010904458327681632, "grad_norm": 0.77734375, "learning_rate": 0.001986987220528201, "loss": 0.3103, "step": 6150 }, { "epoch": 0.010908004492991447, "grad_norm": 1.0078125, "learning_rate": 0.0019869771340436187, "loss": 0.2912, "step": 6152 }, { "epoch": 0.010911550658301263, "grad_norm": 1.25, "learning_rate": 0.001986967043679913, "loss": 0.3896, "step": 6154 }, { "epoch": 0.010915096823611078, "grad_norm": 1.59375, "learning_rate": 0.001986956949437129, "loss": 0.4143, "step": 6156 }, { "epoch": 0.010918642988920892, "grad_norm": 0.29296875, "learning_rate": 0.0019869468513153106, "loss": 0.2488, "step": 6158 }, { "epoch": 0.010922189154230709, "grad_norm": 1.1328125, "learning_rate": 0.001986936749314502, "loss": 0.2611, "step": 6160 }, { "epoch": 0.010925735319540523, "grad_norm": 0.435546875, "learning_rate": 0.001986926643434747, "loss": 0.2172, "step": 6162 }, { "epoch": 0.010929281484850338, "grad_norm": 0.67578125, "learning_rate": 0.00198691653367609, "loss": 0.3055, "step": 6164 }, { "epoch": 0.010932827650160154, "grad_norm": 0.416015625, "learning_rate": 0.001986906420038575, "loss": 0.2987, "step": 6166 }, { "epoch": 0.010936373815469969, "grad_norm": 0.796875, "learning_rate": 0.001986896302522247, "loss": 0.2669, "step": 6168 }, { "epoch": 0.010939919980779785, "grad_norm": 0.4765625, "learning_rate": 0.0019868861811271495, "loss": 0.2294, "step": 6170 }, { "epoch": 0.0109434661460896, "grad_norm": 0.5234375, "learning_rate": 0.001986876055853327, "loss": 0.2986, "step": 6172 }, { "epoch": 0.010947012311399414, "grad_norm": 0.49609375, "learning_rate": 0.001986865926700824, "loss": 0.2823, "step": 6174 }, { "epoch": 0.01095055847670923, "grad_norm": 0.48828125, "learning_rate": 0.0019868557936696847, "loss": 0.2529, "step": 6176 }, { "epoch": 0.010954104642019045, "grad_norm": 0.7578125, "learning_rate": 0.001986845656759953, "loss": 0.2319, "step": 6178 }, { "epoch": 0.01095765080732886, "grad_norm": 0.42578125, "learning_rate": 0.0019868355159716735, "loss": 0.2275, "step": 6180 }, { "epoch": 0.010961196972638676, "grad_norm": 0.298828125, "learning_rate": 0.0019868253713048907, "loss": 0.1985, "step": 6182 }, { "epoch": 0.01096474313794849, "grad_norm": 0.5234375, "learning_rate": 0.001986815222759649, "loss": 0.2351, "step": 6184 }, { "epoch": 0.010968289303258305, "grad_norm": 0.3671875, "learning_rate": 0.001986805070335992, "loss": 0.2689, "step": 6186 }, { "epoch": 0.010971835468568121, "grad_norm": 0.259765625, "learning_rate": 0.0019867949140339658, "loss": 0.2128, "step": 6188 }, { "epoch": 0.010975381633877936, "grad_norm": 0.42578125, "learning_rate": 0.001986784753853613, "loss": 0.2121, "step": 6190 }, { "epoch": 0.01097892779918775, "grad_norm": 0.408203125, "learning_rate": 0.001986774589794979, "loss": 0.1796, "step": 6192 }, { "epoch": 0.010982473964497566, "grad_norm": 0.8984375, "learning_rate": 0.001986764421858108, "loss": 0.3173, "step": 6194 }, { "epoch": 0.010986020129807381, "grad_norm": 0.294921875, "learning_rate": 0.0019867542500430447, "loss": 0.2264, "step": 6196 }, { "epoch": 0.010989566295117196, "grad_norm": 2.171875, "learning_rate": 0.001986744074349833, "loss": 0.3214, "step": 6198 }, { "epoch": 0.010993112460427012, "grad_norm": 0.41796875, "learning_rate": 0.0019867338947785183, "loss": 0.2817, "step": 6200 }, { "epoch": 0.010996658625736826, "grad_norm": 0.84765625, "learning_rate": 0.001986723711329144, "loss": 0.206, "step": 6202 }, { "epoch": 0.011000204791046643, "grad_norm": 1.015625, "learning_rate": 0.001986713524001756, "loss": 0.3411, "step": 6204 }, { "epoch": 0.011003750956356457, "grad_norm": 0.515625, "learning_rate": 0.0019867033327963975, "loss": 0.3907, "step": 6206 }, { "epoch": 0.011007297121666272, "grad_norm": 1.5078125, "learning_rate": 0.001986693137713114, "loss": 0.4127, "step": 6208 }, { "epoch": 0.011010843286976088, "grad_norm": 0.482421875, "learning_rate": 0.0019866829387519495, "loss": 0.2162, "step": 6210 }, { "epoch": 0.011014389452285903, "grad_norm": 2.1875, "learning_rate": 0.001986672735912949, "loss": 0.4836, "step": 6212 }, { "epoch": 0.011017935617595717, "grad_norm": 0.9609375, "learning_rate": 0.001986662529196157, "loss": 0.2363, "step": 6214 }, { "epoch": 0.011021481782905533, "grad_norm": 1.1875, "learning_rate": 0.0019866523186016184, "loss": 0.6834, "step": 6216 }, { "epoch": 0.011025027948215348, "grad_norm": 0.45703125, "learning_rate": 0.0019866421041293773, "loss": 0.2378, "step": 6218 }, { "epoch": 0.011028574113525163, "grad_norm": 0.59765625, "learning_rate": 0.001986631885779479, "loss": 0.2674, "step": 6220 }, { "epoch": 0.011032120278834979, "grad_norm": 0.349609375, "learning_rate": 0.0019866216635519673, "loss": 0.3375, "step": 6222 }, { "epoch": 0.011035666444144793, "grad_norm": 0.3125, "learning_rate": 0.001986611437446888, "loss": 0.4332, "step": 6224 }, { "epoch": 0.011039212609454608, "grad_norm": 0.33984375, "learning_rate": 0.0019866012074642846, "loss": 0.3157, "step": 6226 }, { "epoch": 0.011042758774764424, "grad_norm": 0.345703125, "learning_rate": 0.001986590973604203, "loss": 0.2539, "step": 6228 }, { "epoch": 0.011046304940074239, "grad_norm": 0.2451171875, "learning_rate": 0.001986580735866688, "loss": 0.257, "step": 6230 }, { "epoch": 0.011049851105384053, "grad_norm": 0.31640625, "learning_rate": 0.0019865704942517827, "loss": 0.244, "step": 6232 }, { "epoch": 0.01105339727069387, "grad_norm": 0.40234375, "learning_rate": 0.001986560248759534, "loss": 0.2814, "step": 6234 }, { "epoch": 0.011056943436003684, "grad_norm": 0.427734375, "learning_rate": 0.001986549999389985, "loss": 0.2961, "step": 6236 }, { "epoch": 0.0110604896013135, "grad_norm": 0.5703125, "learning_rate": 0.0019865397461431814, "loss": 0.3519, "step": 6238 }, { "epoch": 0.011064035766623315, "grad_norm": 0.1689453125, "learning_rate": 0.0019865294890191684, "loss": 0.1827, "step": 6240 }, { "epoch": 0.01106758193193313, "grad_norm": 0.283203125, "learning_rate": 0.00198651922801799, "loss": 0.2183, "step": 6242 }, { "epoch": 0.011071128097242946, "grad_norm": 1.4609375, "learning_rate": 0.0019865089631396914, "loss": 0.3905, "step": 6244 }, { "epoch": 0.01107467426255276, "grad_norm": 0.4765625, "learning_rate": 0.0019864986943843176, "loss": 0.2733, "step": 6246 }, { "epoch": 0.011078220427862575, "grad_norm": 0.435546875, "learning_rate": 0.0019864884217519136, "loss": 0.2972, "step": 6248 }, { "epoch": 0.011081766593172391, "grad_norm": 0.478515625, "learning_rate": 0.001986478145242524, "loss": 0.329, "step": 6250 }, { "epoch": 0.011085312758482206, "grad_norm": 0.8203125, "learning_rate": 0.0019864678648561945, "loss": 0.363, "step": 6252 }, { "epoch": 0.01108885892379202, "grad_norm": 0.70703125, "learning_rate": 0.0019864575805929687, "loss": 0.3268, "step": 6254 }, { "epoch": 0.011092405089101837, "grad_norm": 0.59765625, "learning_rate": 0.0019864472924528928, "loss": 0.2758, "step": 6256 }, { "epoch": 0.011095951254411651, "grad_norm": 1.3359375, "learning_rate": 0.0019864370004360112, "loss": 0.2462, "step": 6258 }, { "epoch": 0.011099497419721466, "grad_norm": 0.98828125, "learning_rate": 0.0019864267045423692, "loss": 0.2855, "step": 6260 }, { "epoch": 0.011103043585031282, "grad_norm": 0.3515625, "learning_rate": 0.0019864164047720114, "loss": 0.289, "step": 6262 }, { "epoch": 0.011106589750341097, "grad_norm": 0.625, "learning_rate": 0.0019864061011249834, "loss": 0.6329, "step": 6264 }, { "epoch": 0.011110135915650911, "grad_norm": 0.392578125, "learning_rate": 0.00198639579360133, "loss": 0.241, "step": 6266 }, { "epoch": 0.011113682080960727, "grad_norm": 0.26171875, "learning_rate": 0.001986385482201096, "loss": 0.236, "step": 6268 }, { "epoch": 0.011117228246270542, "grad_norm": 1.28125, "learning_rate": 0.001986375166924327, "loss": 0.314, "step": 6270 }, { "epoch": 0.011120774411580358, "grad_norm": 0.365234375, "learning_rate": 0.001986364847771068, "loss": 0.2481, "step": 6272 }, { "epoch": 0.011124320576890173, "grad_norm": 0.369140625, "learning_rate": 0.0019863545247413637, "loss": 0.2299, "step": 6274 }, { "epoch": 0.011127866742199987, "grad_norm": 0.87890625, "learning_rate": 0.00198634419783526, "loss": 0.2389, "step": 6276 }, { "epoch": 0.011131412907509804, "grad_norm": 0.3359375, "learning_rate": 0.0019863338670528014, "loss": 0.2651, "step": 6278 }, { "epoch": 0.011134959072819618, "grad_norm": 4.59375, "learning_rate": 0.001986323532394033, "loss": 0.2686, "step": 6280 }, { "epoch": 0.011138505238129433, "grad_norm": 0.306640625, "learning_rate": 0.0019863131938590004, "loss": 0.2385, "step": 6282 }, { "epoch": 0.011142051403439249, "grad_norm": 0.3046875, "learning_rate": 0.0019863028514477492, "loss": 0.3189, "step": 6284 }, { "epoch": 0.011145597568749064, "grad_norm": 1.1796875, "learning_rate": 0.001986292505160324, "loss": 0.3359, "step": 6286 }, { "epoch": 0.011149143734058878, "grad_norm": 0.8671875, "learning_rate": 0.00198628215499677, "loss": 0.2666, "step": 6288 }, { "epoch": 0.011152689899368694, "grad_norm": 0.625, "learning_rate": 0.0019862718009571326, "loss": 0.3466, "step": 6290 }, { "epoch": 0.011156236064678509, "grad_norm": 0.4375, "learning_rate": 0.0019862614430414573, "loss": 0.2416, "step": 6292 }, { "epoch": 0.011159782229988324, "grad_norm": 0.30859375, "learning_rate": 0.001986251081249789, "loss": 0.209, "step": 6294 }, { "epoch": 0.01116332839529814, "grad_norm": 0.6484375, "learning_rate": 0.0019862407155821736, "loss": 0.259, "step": 6296 }, { "epoch": 0.011166874560607954, "grad_norm": 1.78125, "learning_rate": 0.001986230346038656, "loss": 0.3313, "step": 6298 }, { "epoch": 0.011170420725917769, "grad_norm": 0.6953125, "learning_rate": 0.0019862199726192816, "loss": 0.2995, "step": 6300 }, { "epoch": 0.011173966891227585, "grad_norm": 0.3046875, "learning_rate": 0.001986209595324096, "loss": 0.2362, "step": 6302 }, { "epoch": 0.0111775130565374, "grad_norm": 1.2890625, "learning_rate": 0.001986199214153144, "loss": 0.291, "step": 6304 }, { "epoch": 0.011181059221847216, "grad_norm": 0.79296875, "learning_rate": 0.0019861888291064717, "loss": 0.2559, "step": 6306 }, { "epoch": 0.01118460538715703, "grad_norm": 0.2060546875, "learning_rate": 0.0019861784401841243, "loss": 0.2296, "step": 6308 }, { "epoch": 0.011188151552466845, "grad_norm": 0.439453125, "learning_rate": 0.001986168047386147, "loss": 0.2676, "step": 6310 }, { "epoch": 0.011191697717776661, "grad_norm": 1.6953125, "learning_rate": 0.0019861576507125855, "loss": 0.5058, "step": 6312 }, { "epoch": 0.011195243883086476, "grad_norm": 0.51171875, "learning_rate": 0.001986147250163485, "loss": 0.2751, "step": 6314 }, { "epoch": 0.01119879004839629, "grad_norm": 0.59765625, "learning_rate": 0.0019861368457388916, "loss": 0.3495, "step": 6316 }, { "epoch": 0.011202336213706107, "grad_norm": 0.2353515625, "learning_rate": 0.00198612643743885, "loss": 0.354, "step": 6318 }, { "epoch": 0.011205882379015921, "grad_norm": 0.484375, "learning_rate": 0.001986116025263406, "loss": 0.2894, "step": 6320 }, { "epoch": 0.011209428544325736, "grad_norm": 0.45703125, "learning_rate": 0.0019861056092126054, "loss": 0.2521, "step": 6322 }, { "epoch": 0.011212974709635552, "grad_norm": 0.69140625, "learning_rate": 0.0019860951892864934, "loss": 0.2555, "step": 6324 }, { "epoch": 0.011216520874945367, "grad_norm": 0.8359375, "learning_rate": 0.001986084765485116, "loss": 0.3349, "step": 6326 }, { "epoch": 0.011220067040255181, "grad_norm": 0.30078125, "learning_rate": 0.0019860743378085186, "loss": 0.2099, "step": 6328 }, { "epoch": 0.011223613205564998, "grad_norm": 0.8125, "learning_rate": 0.0019860639062567464, "loss": 0.24, "step": 6330 }, { "epoch": 0.011227159370874812, "grad_norm": 0.51171875, "learning_rate": 0.001986053470829846, "loss": 0.2166, "step": 6332 }, { "epoch": 0.011230705536184627, "grad_norm": 0.68359375, "learning_rate": 0.0019860430315278618, "loss": 0.2391, "step": 6334 }, { "epoch": 0.011234251701494443, "grad_norm": 0.56640625, "learning_rate": 0.00198603258835084, "loss": 0.3065, "step": 6336 }, { "epoch": 0.011237797866804258, "grad_norm": 1.2734375, "learning_rate": 0.0019860221412988264, "loss": 0.3638, "step": 6338 }, { "epoch": 0.011241344032114074, "grad_norm": 0.6875, "learning_rate": 0.0019860116903718666, "loss": 0.2502, "step": 6340 }, { "epoch": 0.011244890197423888, "grad_norm": 0.80859375, "learning_rate": 0.0019860012355700065, "loss": 0.2676, "step": 6342 }, { "epoch": 0.011248436362733703, "grad_norm": 0.91796875, "learning_rate": 0.001985990776893292, "loss": 0.2933, "step": 6344 }, { "epoch": 0.01125198252804352, "grad_norm": 0.70703125, "learning_rate": 0.001985980314341768, "loss": 0.2384, "step": 6346 }, { "epoch": 0.011255528693353334, "grad_norm": 0.4140625, "learning_rate": 0.0019859698479154806, "loss": 0.2574, "step": 6348 }, { "epoch": 0.011259074858663148, "grad_norm": 1.03125, "learning_rate": 0.001985959377614476, "loss": 0.255, "step": 6350 }, { "epoch": 0.011262621023972965, "grad_norm": 1.4296875, "learning_rate": 0.0019859489034387994, "loss": 0.5737, "step": 6352 }, { "epoch": 0.01126616718928278, "grad_norm": 0.384765625, "learning_rate": 0.0019859384253884975, "loss": 0.2619, "step": 6354 }, { "epoch": 0.011269713354592594, "grad_norm": 0.69921875, "learning_rate": 0.001985927943463615, "loss": 0.2124, "step": 6356 }, { "epoch": 0.01127325951990241, "grad_norm": 17.0, "learning_rate": 0.001985917457664199, "loss": 0.3274, "step": 6358 }, { "epoch": 0.011276805685212225, "grad_norm": 0.87890625, "learning_rate": 0.0019859069679902938, "loss": 0.2299, "step": 6360 }, { "epoch": 0.01128035185052204, "grad_norm": 1.15625, "learning_rate": 0.0019858964744419467, "loss": 0.2498, "step": 6362 }, { "epoch": 0.011283898015831855, "grad_norm": 2.28125, "learning_rate": 0.0019858859770192027, "loss": 0.3732, "step": 6364 }, { "epoch": 0.01128744418114167, "grad_norm": 0.53125, "learning_rate": 0.001985875475722108, "loss": 0.3462, "step": 6366 }, { "epoch": 0.011290990346451485, "grad_norm": 0.1943359375, "learning_rate": 0.0019858649705507088, "loss": 0.2156, "step": 6368 }, { "epoch": 0.0112945365117613, "grad_norm": 0.30078125, "learning_rate": 0.0019858544615050503, "loss": 0.3415, "step": 6370 }, { "epoch": 0.011298082677071115, "grad_norm": 1.46875, "learning_rate": 0.0019858439485851793, "loss": 0.2704, "step": 6372 }, { "epoch": 0.011301628842380932, "grad_norm": 0.42578125, "learning_rate": 0.0019858334317911413, "loss": 0.3162, "step": 6374 }, { "epoch": 0.011305175007690746, "grad_norm": 0.5703125, "learning_rate": 0.0019858229111229826, "loss": 0.2298, "step": 6376 }, { "epoch": 0.01130872117300056, "grad_norm": 0.396484375, "learning_rate": 0.0019858123865807487, "loss": 0.2279, "step": 6378 }, { "epoch": 0.011312267338310377, "grad_norm": 0.4609375, "learning_rate": 0.0019858018581644862, "loss": 0.3286, "step": 6380 }, { "epoch": 0.011315813503620192, "grad_norm": 0.53125, "learning_rate": 0.0019857913258742414, "loss": 0.4062, "step": 6382 }, { "epoch": 0.011319359668930006, "grad_norm": 0.55078125, "learning_rate": 0.0019857807897100594, "loss": 0.2201, "step": 6384 }, { "epoch": 0.011322905834239822, "grad_norm": 0.6171875, "learning_rate": 0.0019857702496719866, "loss": 0.2164, "step": 6386 }, { "epoch": 0.011326451999549637, "grad_norm": 0.1630859375, "learning_rate": 0.0019857597057600694, "loss": 0.217, "step": 6388 }, { "epoch": 0.011329998164859452, "grad_norm": 0.318359375, "learning_rate": 0.001985749157974354, "loss": 0.2746, "step": 6390 }, { "epoch": 0.011333544330169268, "grad_norm": 1.2109375, "learning_rate": 0.001985738606314886, "loss": 0.6119, "step": 6392 }, { "epoch": 0.011337090495479082, "grad_norm": 0.5, "learning_rate": 0.0019857280507817117, "loss": 0.2621, "step": 6394 }, { "epoch": 0.011340636660788897, "grad_norm": 0.99609375, "learning_rate": 0.0019857174913748775, "loss": 0.2558, "step": 6396 }, { "epoch": 0.011344182826098713, "grad_norm": 0.58984375, "learning_rate": 0.0019857069280944297, "loss": 0.279, "step": 6398 }, { "epoch": 0.011347728991408528, "grad_norm": 0.7421875, "learning_rate": 0.0019856963609404137, "loss": 0.2397, "step": 6400 }, { "epoch": 0.011351275156718342, "grad_norm": 0.318359375, "learning_rate": 0.001985685789912877, "loss": 0.1866, "step": 6402 }, { "epoch": 0.011354821322028159, "grad_norm": 0.4453125, "learning_rate": 0.0019856752150118648, "loss": 0.3086, "step": 6404 }, { "epoch": 0.011358367487337973, "grad_norm": 0.71875, "learning_rate": 0.0019856646362374237, "loss": 0.2461, "step": 6406 }, { "epoch": 0.01136191365264779, "grad_norm": 0.98828125, "learning_rate": 0.0019856540535896, "loss": 0.3312, "step": 6408 }, { "epoch": 0.011365459817957604, "grad_norm": 0.400390625, "learning_rate": 0.00198564346706844, "loss": 0.2237, "step": 6410 }, { "epoch": 0.011369005983267419, "grad_norm": 0.64453125, "learning_rate": 0.0019856328766739897, "loss": 0.3129, "step": 6412 }, { "epoch": 0.011372552148577235, "grad_norm": 0.400390625, "learning_rate": 0.0019856222824062957, "loss": 0.2855, "step": 6414 }, { "epoch": 0.01137609831388705, "grad_norm": 0.75390625, "learning_rate": 0.0019856116842654043, "loss": 0.2575, "step": 6416 }, { "epoch": 0.011379644479196864, "grad_norm": 1.3828125, "learning_rate": 0.0019856010822513616, "loss": 0.509, "step": 6418 }, { "epoch": 0.01138319064450668, "grad_norm": 1.0234375, "learning_rate": 0.0019855904763642143, "loss": 0.2234, "step": 6420 }, { "epoch": 0.011386736809816495, "grad_norm": 0.5859375, "learning_rate": 0.001985579866604009, "loss": 0.2209, "step": 6422 }, { "epoch": 0.01139028297512631, "grad_norm": 0.337890625, "learning_rate": 0.0019855692529707914, "loss": 0.3473, "step": 6424 }, { "epoch": 0.011393829140436126, "grad_norm": 0.41015625, "learning_rate": 0.0019855586354646086, "loss": 0.272, "step": 6426 }, { "epoch": 0.01139737530574594, "grad_norm": 0.38671875, "learning_rate": 0.0019855480140855064, "loss": 0.2868, "step": 6428 }, { "epoch": 0.011400921471055755, "grad_norm": 0.365234375, "learning_rate": 0.0019855373888335317, "loss": 0.3183, "step": 6430 }, { "epoch": 0.011404467636365571, "grad_norm": 0.53515625, "learning_rate": 0.001985526759708731, "loss": 0.2578, "step": 6432 }, { "epoch": 0.011408013801675386, "grad_norm": 0.67578125, "learning_rate": 0.0019855161267111504, "loss": 0.2703, "step": 6434 }, { "epoch": 0.0114115599669852, "grad_norm": 0.43359375, "learning_rate": 0.0019855054898408366, "loss": 0.2035, "step": 6436 }, { "epoch": 0.011415106132295016, "grad_norm": 0.32421875, "learning_rate": 0.0019854948490978363, "loss": 0.2835, "step": 6438 }, { "epoch": 0.011418652297604831, "grad_norm": 0.3984375, "learning_rate": 0.001985484204482196, "loss": 0.2202, "step": 6440 }, { "epoch": 0.011422198462914647, "grad_norm": 0.546875, "learning_rate": 0.001985473555993962, "loss": 0.2038, "step": 6442 }, { "epoch": 0.011425744628224462, "grad_norm": 0.3671875, "learning_rate": 0.001985462903633181, "loss": 0.2454, "step": 6444 }, { "epoch": 0.011429290793534276, "grad_norm": 0.78515625, "learning_rate": 0.0019854522473998996, "loss": 0.3056, "step": 6446 }, { "epoch": 0.011432836958844093, "grad_norm": 13.25, "learning_rate": 0.0019854415872941644, "loss": 0.3753, "step": 6448 }, { "epoch": 0.011436383124153907, "grad_norm": 0.482421875, "learning_rate": 0.001985430923316022, "loss": 0.2377, "step": 6450 }, { "epoch": 0.011439929289463722, "grad_norm": 0.453125, "learning_rate": 0.001985420255465519, "loss": 0.352, "step": 6452 }, { "epoch": 0.011443475454773538, "grad_norm": 0.43359375, "learning_rate": 0.0019854095837427022, "loss": 0.3862, "step": 6454 }, { "epoch": 0.011447021620083353, "grad_norm": 0.6640625, "learning_rate": 0.001985398908147618, "loss": 0.3506, "step": 6456 }, { "epoch": 0.011450567785393167, "grad_norm": 0.486328125, "learning_rate": 0.0019853882286803137, "loss": 0.2485, "step": 6458 }, { "epoch": 0.011454113950702983, "grad_norm": 0.408203125, "learning_rate": 0.001985377545340835, "loss": 0.2664, "step": 6460 }, { "epoch": 0.011457660116012798, "grad_norm": 1.046875, "learning_rate": 0.0019853668581292297, "loss": 0.2627, "step": 6462 }, { "epoch": 0.011461206281322613, "grad_norm": 1.734375, "learning_rate": 0.001985356167045544, "loss": 0.5401, "step": 6464 }, { "epoch": 0.011464752446632429, "grad_norm": 0.396484375, "learning_rate": 0.0019853454720898246, "loss": 0.2897, "step": 6466 }, { "epoch": 0.011468298611942243, "grad_norm": 0.7890625, "learning_rate": 0.0019853347732621185, "loss": 0.2578, "step": 6468 }, { "epoch": 0.011471844777252058, "grad_norm": 0.3671875, "learning_rate": 0.0019853240705624718, "loss": 0.3041, "step": 6470 }, { "epoch": 0.011475390942561874, "grad_norm": 1.7109375, "learning_rate": 0.0019853133639909327, "loss": 0.2205, "step": 6472 }, { "epoch": 0.011478937107871689, "grad_norm": 10.125, "learning_rate": 0.001985302653547547, "loss": 0.2979, "step": 6474 }, { "epoch": 0.011482483273181505, "grad_norm": 0.392578125, "learning_rate": 0.0019852919392323613, "loss": 0.2948, "step": 6476 }, { "epoch": 0.01148602943849132, "grad_norm": 0.27734375, "learning_rate": 0.001985281221045423, "loss": 0.3383, "step": 6478 }, { "epoch": 0.011489575603801134, "grad_norm": 6.125, "learning_rate": 0.001985270498986779, "loss": 0.5451, "step": 6480 }, { "epoch": 0.01149312176911095, "grad_norm": 2.015625, "learning_rate": 0.001985259773056476, "loss": 0.3665, "step": 6482 }, { "epoch": 0.011496667934420765, "grad_norm": 0.40234375, "learning_rate": 0.0019852490432545615, "loss": 0.2287, "step": 6484 }, { "epoch": 0.01150021409973058, "grad_norm": 0.8828125, "learning_rate": 0.001985238309581081, "loss": 0.2771, "step": 6486 }, { "epoch": 0.011503760265040396, "grad_norm": 0.494140625, "learning_rate": 0.001985227572036083, "loss": 0.2507, "step": 6488 }, { "epoch": 0.01150730643035021, "grad_norm": 1.0546875, "learning_rate": 0.0019852168306196136, "loss": 0.2873, "step": 6490 }, { "epoch": 0.011510852595660025, "grad_norm": 1.4609375, "learning_rate": 0.0019852060853317198, "loss": 0.3437, "step": 6492 }, { "epoch": 0.011514398760969841, "grad_norm": 0.37890625, "learning_rate": 0.001985195336172449, "loss": 0.2354, "step": 6494 }, { "epoch": 0.011517944926279656, "grad_norm": 0.26171875, "learning_rate": 0.001985184583141848, "loss": 0.2171, "step": 6496 }, { "epoch": 0.01152149109158947, "grad_norm": 3.046875, "learning_rate": 0.001985173826239964, "loss": 0.3967, "step": 6498 }, { "epoch": 0.011525037256899287, "grad_norm": 3.34375, "learning_rate": 0.001985163065466843, "loss": 0.6066, "step": 6500 }, { "epoch": 0.011528583422209101, "grad_norm": 1.375, "learning_rate": 0.001985152300822534, "loss": 0.3862, "step": 6502 }, { "epoch": 0.011532129587518916, "grad_norm": 0.39453125, "learning_rate": 0.0019851415323070823, "loss": 0.2662, "step": 6504 }, { "epoch": 0.011535675752828732, "grad_norm": 0.625, "learning_rate": 0.001985130759920536, "loss": 0.2981, "step": 6506 }, { "epoch": 0.011539221918138547, "grad_norm": 1.3203125, "learning_rate": 0.0019851199836629415, "loss": 0.2541, "step": 6508 }, { "epoch": 0.011542768083448363, "grad_norm": 2.515625, "learning_rate": 0.0019851092035343466, "loss": 0.2319, "step": 6510 }, { "epoch": 0.011546314248758177, "grad_norm": 1.015625, "learning_rate": 0.0019850984195347986, "loss": 0.3721, "step": 6512 }, { "epoch": 0.011549860414067992, "grad_norm": 0.42578125, "learning_rate": 0.0019850876316643436, "loss": 0.3427, "step": 6514 }, { "epoch": 0.011553406579377808, "grad_norm": 2.96875, "learning_rate": 0.0019850768399230297, "loss": 0.3732, "step": 6516 }, { "epoch": 0.011556952744687623, "grad_norm": 0.44140625, "learning_rate": 0.0019850660443109036, "loss": 0.3362, "step": 6518 }, { "epoch": 0.011560498909997437, "grad_norm": 0.6171875, "learning_rate": 0.001985055244828013, "loss": 0.3409, "step": 6520 }, { "epoch": 0.011564045075307254, "grad_norm": 0.7265625, "learning_rate": 0.001985044441474405, "loss": 0.3454, "step": 6522 }, { "epoch": 0.011567591240617068, "grad_norm": 0.306640625, "learning_rate": 0.001985033634250126, "loss": 0.3016, "step": 6524 }, { "epoch": 0.011571137405926883, "grad_norm": 0.3359375, "learning_rate": 0.0019850228231552245, "loss": 0.4201, "step": 6526 }, { "epoch": 0.011574683571236699, "grad_norm": 0.76953125, "learning_rate": 0.001985012008189747, "loss": 0.2919, "step": 6528 }, { "epoch": 0.011578229736546514, "grad_norm": 0.50390625, "learning_rate": 0.0019850011893537416, "loss": 0.3108, "step": 6530 }, { "epoch": 0.011581775901856328, "grad_norm": 0.66015625, "learning_rate": 0.0019849903666472545, "loss": 0.3685, "step": 6532 }, { "epoch": 0.011585322067166145, "grad_norm": 0.43359375, "learning_rate": 0.001984979540070334, "loss": 0.3112, "step": 6534 }, { "epoch": 0.011588868232475959, "grad_norm": 3.234375, "learning_rate": 0.0019849687096230267, "loss": 0.4082, "step": 6536 }, { "epoch": 0.011592414397785774, "grad_norm": 1.3515625, "learning_rate": 0.0019849578753053806, "loss": 0.6489, "step": 6538 }, { "epoch": 0.01159596056309559, "grad_norm": 1.0703125, "learning_rate": 0.0019849470371174427, "loss": 0.288, "step": 6540 }, { "epoch": 0.011599506728405404, "grad_norm": 0.68359375, "learning_rate": 0.0019849361950592605, "loss": 0.2855, "step": 6542 }, { "epoch": 0.01160305289371522, "grad_norm": 1.171875, "learning_rate": 0.0019849253491308816, "loss": 0.2629, "step": 6544 }, { "epoch": 0.011606599059025035, "grad_norm": 0.5703125, "learning_rate": 0.0019849144993323528, "loss": 0.1863, "step": 6546 }, { "epoch": 0.01161014522433485, "grad_norm": 0.54296875, "learning_rate": 0.0019849036456637222, "loss": 0.2295, "step": 6548 }, { "epoch": 0.011613691389644666, "grad_norm": 1.6328125, "learning_rate": 0.001984892788125037, "loss": 0.3621, "step": 6550 }, { "epoch": 0.01161723755495448, "grad_norm": 0.59765625, "learning_rate": 0.001984881926716345, "loss": 0.2898, "step": 6552 }, { "epoch": 0.011620783720264295, "grad_norm": 0.5546875, "learning_rate": 0.001984871061437693, "loss": 0.2619, "step": 6554 }, { "epoch": 0.011624329885574112, "grad_norm": 0.365234375, "learning_rate": 0.0019848601922891292, "loss": 0.2608, "step": 6556 }, { "epoch": 0.011627876050883926, "grad_norm": 0.625, "learning_rate": 0.0019848493192707014, "loss": 0.4518, "step": 6558 }, { "epoch": 0.01163142221619374, "grad_norm": 2.171875, "learning_rate": 0.0019848384423824562, "loss": 0.3408, "step": 6560 }, { "epoch": 0.011634968381503557, "grad_norm": 0.462890625, "learning_rate": 0.0019848275616244416, "loss": 0.2635, "step": 6562 }, { "epoch": 0.011638514546813371, "grad_norm": 0.482421875, "learning_rate": 0.001984816676996705, "loss": 0.2266, "step": 6564 }, { "epoch": 0.011642060712123186, "grad_norm": 0.41796875, "learning_rate": 0.0019848057884992946, "loss": 0.2707, "step": 6566 }, { "epoch": 0.011645606877433002, "grad_norm": 0.47265625, "learning_rate": 0.0019847948961322576, "loss": 0.3033, "step": 6568 }, { "epoch": 0.011649153042742817, "grad_norm": 0.416015625, "learning_rate": 0.001984783999895642, "loss": 0.3255, "step": 6570 }, { "epoch": 0.011652699208052631, "grad_norm": 0.515625, "learning_rate": 0.0019847730997894944, "loss": 0.3003, "step": 6572 }, { "epoch": 0.011656245373362448, "grad_norm": 0.63671875, "learning_rate": 0.0019847621958138635, "loss": 0.277, "step": 6574 }, { "epoch": 0.011659791538672262, "grad_norm": 0.32421875, "learning_rate": 0.0019847512879687967, "loss": 0.2551, "step": 6576 }, { "epoch": 0.011663337703982079, "grad_norm": 0.71875, "learning_rate": 0.001984740376254342, "loss": 0.2087, "step": 6578 }, { "epoch": 0.011666883869291893, "grad_norm": 1.3203125, "learning_rate": 0.0019847294606705466, "loss": 0.3965, "step": 6580 }, { "epoch": 0.011670430034601708, "grad_norm": 1.0078125, "learning_rate": 0.0019847185412174583, "loss": 0.3613, "step": 6582 }, { "epoch": 0.011673976199911524, "grad_norm": 0.4375, "learning_rate": 0.001984707617895125, "loss": 0.3577, "step": 6584 }, { "epoch": 0.011677522365221338, "grad_norm": 0.53515625, "learning_rate": 0.0019846966907035943, "loss": 0.1842, "step": 6586 }, { "epoch": 0.011681068530531153, "grad_norm": 0.82421875, "learning_rate": 0.0019846857596429145, "loss": 0.3677, "step": 6588 }, { "epoch": 0.01168461469584097, "grad_norm": 0.310546875, "learning_rate": 0.0019846748247131334, "loss": 0.2998, "step": 6590 }, { "epoch": 0.011688160861150784, "grad_norm": 0.515625, "learning_rate": 0.001984663885914298, "loss": 0.2502, "step": 6592 }, { "epoch": 0.011691707026460598, "grad_norm": 1.3046875, "learning_rate": 0.001984652943246457, "loss": 0.3343, "step": 6594 }, { "epoch": 0.011695253191770415, "grad_norm": 1.4921875, "learning_rate": 0.001984641996709657, "loss": 0.3007, "step": 6596 }, { "epoch": 0.01169879935708023, "grad_norm": 2.171875, "learning_rate": 0.001984631046303948, "loss": 0.4247, "step": 6598 }, { "epoch": 0.011702345522390044, "grad_norm": 0.51953125, "learning_rate": 0.0019846200920293757, "loss": 0.207, "step": 6600 }, { "epoch": 0.01170589168769986, "grad_norm": 5.0625, "learning_rate": 0.0019846091338859896, "loss": 0.5443, "step": 6602 }, { "epoch": 0.011709437853009675, "grad_norm": 0.71484375, "learning_rate": 0.0019845981718738365, "loss": 0.2723, "step": 6604 }, { "epoch": 0.01171298401831949, "grad_norm": 2.15625, "learning_rate": 0.0019845872059929648, "loss": 0.2741, "step": 6606 }, { "epoch": 0.011716530183629306, "grad_norm": 1.875, "learning_rate": 0.001984576236243423, "loss": 0.2785, "step": 6608 }, { "epoch": 0.01172007634893912, "grad_norm": 0.41015625, "learning_rate": 0.0019845652626252585, "loss": 0.2408, "step": 6610 }, { "epoch": 0.011723622514248936, "grad_norm": 0.51171875, "learning_rate": 0.001984554285138519, "loss": 0.357, "step": 6612 }, { "epoch": 0.011727168679558751, "grad_norm": 0.515625, "learning_rate": 0.001984543303783253, "loss": 0.245, "step": 6614 }, { "epoch": 0.011730714844868565, "grad_norm": 0.29296875, "learning_rate": 0.0019845323185595084, "loss": 0.1853, "step": 6616 }, { "epoch": 0.011734261010178382, "grad_norm": 0.859375, "learning_rate": 0.001984521329467333, "loss": 0.2566, "step": 6618 }, { "epoch": 0.011737807175488196, "grad_norm": 0.76953125, "learning_rate": 0.0019845103365067757, "loss": 0.2525, "step": 6620 }, { "epoch": 0.01174135334079801, "grad_norm": 0.61328125, "learning_rate": 0.0019844993396778833, "loss": 0.2193, "step": 6622 }, { "epoch": 0.011744899506107827, "grad_norm": 0.474609375, "learning_rate": 0.001984488338980705, "loss": 0.2799, "step": 6624 }, { "epoch": 0.011748445671417642, "grad_norm": 0.5859375, "learning_rate": 0.0019844773344152885, "loss": 0.2092, "step": 6626 }, { "epoch": 0.011751991836727456, "grad_norm": 0.419921875, "learning_rate": 0.0019844663259816816, "loss": 0.2723, "step": 6628 }, { "epoch": 0.011755538002037273, "grad_norm": 0.6328125, "learning_rate": 0.001984455313679933, "loss": 0.2936, "step": 6630 }, { "epoch": 0.011759084167347087, "grad_norm": 0.890625, "learning_rate": 0.0019844442975100905, "loss": 0.2109, "step": 6632 }, { "epoch": 0.011762630332656902, "grad_norm": 1.28125, "learning_rate": 0.0019844332774722026, "loss": 0.3474, "step": 6634 }, { "epoch": 0.011766176497966718, "grad_norm": 0.4296875, "learning_rate": 0.001984422253566317, "loss": 0.3077, "step": 6636 }, { "epoch": 0.011769722663276532, "grad_norm": 0.40234375, "learning_rate": 0.001984411225792482, "loss": 0.1891, "step": 6638 }, { "epoch": 0.011773268828586347, "grad_norm": 1.6015625, "learning_rate": 0.001984400194150747, "loss": 0.2661, "step": 6640 }, { "epoch": 0.011776814993896163, "grad_norm": 0.349609375, "learning_rate": 0.0019843891586411584, "loss": 0.2582, "step": 6642 }, { "epoch": 0.011780361159205978, "grad_norm": 0.48828125, "learning_rate": 0.0019843781192637657, "loss": 0.2815, "step": 6644 }, { "epoch": 0.011783907324515792, "grad_norm": 0.341796875, "learning_rate": 0.0019843670760186167, "loss": 0.2513, "step": 6646 }, { "epoch": 0.011787453489825609, "grad_norm": 1.671875, "learning_rate": 0.0019843560289057594, "loss": 0.3517, "step": 6648 }, { "epoch": 0.011790999655135423, "grad_norm": 1.0625, "learning_rate": 0.001984344977925243, "loss": 0.217, "step": 6650 }, { "epoch": 0.01179454582044524, "grad_norm": 0.255859375, "learning_rate": 0.001984333923077115, "loss": 0.2576, "step": 6652 }, { "epoch": 0.011798091985755054, "grad_norm": 0.625, "learning_rate": 0.001984322864361424, "loss": 0.2353, "step": 6654 }, { "epoch": 0.011801638151064869, "grad_norm": 0.408203125, "learning_rate": 0.0019843118017782192, "loss": 0.2349, "step": 6656 }, { "epoch": 0.011805184316374685, "grad_norm": 0.25390625, "learning_rate": 0.001984300735327548, "loss": 0.2386, "step": 6658 }, { "epoch": 0.0118087304816845, "grad_norm": 0.4140625, "learning_rate": 0.0019842896650094587, "loss": 0.2554, "step": 6660 }, { "epoch": 0.011812276646994314, "grad_norm": 0.255859375, "learning_rate": 0.0019842785908240003, "loss": 0.239, "step": 6662 }, { "epoch": 0.01181582281230413, "grad_norm": 0.28125, "learning_rate": 0.001984267512771221, "loss": 0.2206, "step": 6664 }, { "epoch": 0.011819368977613945, "grad_norm": 1.90625, "learning_rate": 0.001984256430851169, "loss": 0.3458, "step": 6666 }, { "epoch": 0.01182291514292376, "grad_norm": 0.259765625, "learning_rate": 0.0019842453450638936, "loss": 0.2103, "step": 6668 }, { "epoch": 0.011826461308233576, "grad_norm": 0.8984375, "learning_rate": 0.0019842342554094424, "loss": 0.3074, "step": 6670 }, { "epoch": 0.01183000747354339, "grad_norm": 0.7421875, "learning_rate": 0.001984223161887864, "loss": 0.2499, "step": 6672 }, { "epoch": 0.011833553638853205, "grad_norm": 0.86328125, "learning_rate": 0.001984212064499207, "loss": 0.271, "step": 6674 }, { "epoch": 0.011837099804163021, "grad_norm": 0.435546875, "learning_rate": 0.0019842009632435203, "loss": 0.4066, "step": 6676 }, { "epoch": 0.011840645969472836, "grad_norm": 0.443359375, "learning_rate": 0.0019841898581208525, "loss": 0.2566, "step": 6678 }, { "epoch": 0.01184419213478265, "grad_norm": 0.85546875, "learning_rate": 0.0019841787491312515, "loss": 0.265, "step": 6680 }, { "epoch": 0.011847738300092467, "grad_norm": 1.0390625, "learning_rate": 0.0019841676362747657, "loss": 0.2992, "step": 6682 }, { "epoch": 0.011851284465402281, "grad_norm": 0.54296875, "learning_rate": 0.001984156519551445, "loss": 0.3472, "step": 6684 }, { "epoch": 0.011854830630712097, "grad_norm": 0.396484375, "learning_rate": 0.001984145398961337, "loss": 0.2074, "step": 6686 }, { "epoch": 0.011858376796021912, "grad_norm": 0.265625, "learning_rate": 0.001984134274504491, "loss": 0.2638, "step": 6688 }, { "epoch": 0.011861922961331726, "grad_norm": 0.28125, "learning_rate": 0.001984123146180955, "loss": 0.2428, "step": 6690 }, { "epoch": 0.011865469126641543, "grad_norm": 0.38671875, "learning_rate": 0.0019841120139907774, "loss": 0.2761, "step": 6692 }, { "epoch": 0.011869015291951357, "grad_norm": 0.49609375, "learning_rate": 0.001984100877934008, "loss": 0.3499, "step": 6694 }, { "epoch": 0.011872561457261172, "grad_norm": 0.50390625, "learning_rate": 0.0019840897380106947, "loss": 0.2732, "step": 6696 }, { "epoch": 0.011876107622570988, "grad_norm": 0.8203125, "learning_rate": 0.0019840785942208867, "loss": 0.2414, "step": 6698 }, { "epoch": 0.011879653787880803, "grad_norm": 0.7734375, "learning_rate": 0.001984067446564632, "loss": 0.2762, "step": 6700 }, { "epoch": 0.011883199953190617, "grad_norm": 0.42578125, "learning_rate": 0.00198405629504198, "loss": 0.2606, "step": 6702 }, { "epoch": 0.011886746118500434, "grad_norm": 0.6875, "learning_rate": 0.0019840451396529795, "loss": 0.2388, "step": 6704 }, { "epoch": 0.011890292283810248, "grad_norm": 0.51953125, "learning_rate": 0.001984033980397679, "loss": 0.2446, "step": 6706 }, { "epoch": 0.011893838449120063, "grad_norm": 0.36328125, "learning_rate": 0.0019840228172761268, "loss": 0.3288, "step": 6708 }, { "epoch": 0.011897384614429879, "grad_norm": 0.330078125, "learning_rate": 0.001984011650288373, "loss": 0.1916, "step": 6710 }, { "epoch": 0.011900930779739693, "grad_norm": 0.4140625, "learning_rate": 0.0019840004794344653, "loss": 0.2786, "step": 6712 }, { "epoch": 0.011904476945049508, "grad_norm": 0.55078125, "learning_rate": 0.001983989304714453, "loss": 0.2285, "step": 6714 }, { "epoch": 0.011908023110359324, "grad_norm": 1.3125, "learning_rate": 0.0019839781261283855, "loss": 0.2872, "step": 6716 }, { "epoch": 0.011911569275669139, "grad_norm": 0.6328125, "learning_rate": 0.001983966943676311, "loss": 0.2707, "step": 6718 }, { "epoch": 0.011915115440978955, "grad_norm": 0.57421875, "learning_rate": 0.001983955757358278, "loss": 0.1967, "step": 6720 }, { "epoch": 0.01191866160628877, "grad_norm": 0.45703125, "learning_rate": 0.0019839445671743366, "loss": 0.3742, "step": 6722 }, { "epoch": 0.011922207771598584, "grad_norm": 0.7734375, "learning_rate": 0.001983933373124535, "loss": 0.2414, "step": 6724 }, { "epoch": 0.0119257539369084, "grad_norm": 0.4296875, "learning_rate": 0.001983922175208922, "loss": 0.2942, "step": 6726 }, { "epoch": 0.011929300102218215, "grad_norm": 0.734375, "learning_rate": 0.001983910973427547, "loss": 0.3007, "step": 6728 }, { "epoch": 0.01193284626752803, "grad_norm": 0.462890625, "learning_rate": 0.001983899767780459, "loss": 0.2479, "step": 6730 }, { "epoch": 0.011936392432837846, "grad_norm": 0.6484375, "learning_rate": 0.0019838885582677064, "loss": 0.2173, "step": 6732 }, { "epoch": 0.01193993859814766, "grad_norm": 0.671875, "learning_rate": 0.001983877344889339, "loss": 0.278, "step": 6734 }, { "epoch": 0.011943484763457475, "grad_norm": 0.330078125, "learning_rate": 0.0019838661276454055, "loss": 0.2704, "step": 6736 }, { "epoch": 0.011947030928767291, "grad_norm": 0.72265625, "learning_rate": 0.001983854906535955, "loss": 0.2654, "step": 6738 }, { "epoch": 0.011950577094077106, "grad_norm": 0.7109375, "learning_rate": 0.0019838436815610362, "loss": 0.2121, "step": 6740 }, { "epoch": 0.01195412325938692, "grad_norm": 2.09375, "learning_rate": 0.001983832452720699, "loss": 0.2342, "step": 6742 }, { "epoch": 0.011957669424696737, "grad_norm": 1.515625, "learning_rate": 0.0019838212200149917, "loss": 0.2736, "step": 6744 }, { "epoch": 0.011961215590006551, "grad_norm": 0.470703125, "learning_rate": 0.0019838099834439643, "loss": 0.2863, "step": 6746 }, { "epoch": 0.011964761755316366, "grad_norm": 0.4140625, "learning_rate": 0.0019837987430076647, "loss": 0.2547, "step": 6748 }, { "epoch": 0.011968307920626182, "grad_norm": 4.0, "learning_rate": 0.001983787498706143, "loss": 0.3155, "step": 6750 }, { "epoch": 0.011971854085935997, "grad_norm": 0.80859375, "learning_rate": 0.0019837762505394486, "loss": 0.2375, "step": 6752 }, { "epoch": 0.011975400251245813, "grad_norm": 1.0390625, "learning_rate": 0.0019837649985076297, "loss": 0.3251, "step": 6754 }, { "epoch": 0.011978946416555628, "grad_norm": 0.447265625, "learning_rate": 0.0019837537426107364, "loss": 0.273, "step": 6756 }, { "epoch": 0.011982492581865442, "grad_norm": 0.28125, "learning_rate": 0.001983742482848817, "loss": 0.4972, "step": 6758 }, { "epoch": 0.011986038747175258, "grad_norm": 0.314453125, "learning_rate": 0.0019837312192219217, "loss": 0.3103, "step": 6760 }, { "epoch": 0.011989584912485073, "grad_norm": 1.1171875, "learning_rate": 0.0019837199517301, "loss": 0.2812, "step": 6762 }, { "epoch": 0.011993131077794887, "grad_norm": 0.796875, "learning_rate": 0.0019837086803734, "loss": 0.2744, "step": 6764 }, { "epoch": 0.011996677243104704, "grad_norm": 0.494140625, "learning_rate": 0.0019836974051518712, "loss": 0.2333, "step": 6766 }, { "epoch": 0.012000223408414518, "grad_norm": 0.3984375, "learning_rate": 0.0019836861260655635, "loss": 0.3304, "step": 6768 }, { "epoch": 0.012003769573724333, "grad_norm": 0.2470703125, "learning_rate": 0.001983674843114526, "loss": 0.242, "step": 6770 }, { "epoch": 0.01200731573903415, "grad_norm": 0.359375, "learning_rate": 0.0019836635562988083, "loss": 0.1962, "step": 6772 }, { "epoch": 0.012010861904343964, "grad_norm": 0.5234375, "learning_rate": 0.001983652265618459, "loss": 0.2323, "step": 6774 }, { "epoch": 0.012014408069653778, "grad_norm": 0.31640625, "learning_rate": 0.0019836409710735285, "loss": 0.2398, "step": 6776 }, { "epoch": 0.012017954234963595, "grad_norm": 0.3828125, "learning_rate": 0.0019836296726640653, "loss": 0.2744, "step": 6778 }, { "epoch": 0.012021500400273409, "grad_norm": 0.83984375, "learning_rate": 0.001983618370390119, "loss": 0.2671, "step": 6780 }, { "epoch": 0.012025046565583224, "grad_norm": 1.3359375, "learning_rate": 0.00198360706425174, "loss": 0.5433, "step": 6782 }, { "epoch": 0.01202859273089304, "grad_norm": 3.03125, "learning_rate": 0.0019835957542489765, "loss": 0.4246, "step": 6784 }, { "epoch": 0.012032138896202854, "grad_norm": 0.5234375, "learning_rate": 0.001983584440381878, "loss": 0.3178, "step": 6786 }, { "epoch": 0.01203568506151267, "grad_norm": 0.3828125, "learning_rate": 0.0019835731226504954, "loss": 0.2733, "step": 6788 }, { "epoch": 0.012039231226822485, "grad_norm": 0.8515625, "learning_rate": 0.0019835618010548765, "loss": 0.3158, "step": 6790 }, { "epoch": 0.0120427773921323, "grad_norm": 0.609375, "learning_rate": 0.0019835504755950717, "loss": 0.2384, "step": 6792 }, { "epoch": 0.012046323557442116, "grad_norm": 0.5859375, "learning_rate": 0.00198353914627113, "loss": 0.2731, "step": 6794 }, { "epoch": 0.01204986972275193, "grad_norm": 0.2490234375, "learning_rate": 0.0019835278130831014, "loss": 0.1919, "step": 6796 }, { "epoch": 0.012053415888061745, "grad_norm": 0.251953125, "learning_rate": 0.0019835164760310356, "loss": 0.2322, "step": 6798 }, { "epoch": 0.012056962053371562, "grad_norm": 0.4140625, "learning_rate": 0.001983505135114982, "loss": 0.2973, "step": 6800 }, { "epoch": 0.012060508218681376, "grad_norm": 0.345703125, "learning_rate": 0.00198349379033499, "loss": 0.2675, "step": 6802 }, { "epoch": 0.01206405438399119, "grad_norm": 0.765625, "learning_rate": 0.00198348244169111, "loss": 0.2733, "step": 6804 }, { "epoch": 0.012067600549301007, "grad_norm": 0.33984375, "learning_rate": 0.00198347108918339, "loss": 0.2579, "step": 6806 }, { "epoch": 0.012071146714610822, "grad_norm": 0.32421875, "learning_rate": 0.001983459732811881, "loss": 0.316, "step": 6808 }, { "epoch": 0.012074692879920636, "grad_norm": 0.3046875, "learning_rate": 0.0019834483725766324, "loss": 0.2426, "step": 6810 }, { "epoch": 0.012078239045230452, "grad_norm": 0.609375, "learning_rate": 0.0019834370084776936, "loss": 0.3107, "step": 6812 }, { "epoch": 0.012081785210540267, "grad_norm": 0.462890625, "learning_rate": 0.001983425640515115, "loss": 0.1981, "step": 6814 }, { "epoch": 0.012085331375850081, "grad_norm": 0.6328125, "learning_rate": 0.001983414268688945, "loss": 0.27, "step": 6816 }, { "epoch": 0.012088877541159898, "grad_norm": 0.71875, "learning_rate": 0.0019834028929992344, "loss": 0.2811, "step": 6818 }, { "epoch": 0.012092423706469712, "grad_norm": 0.89453125, "learning_rate": 0.001983391513446033, "loss": 0.2405, "step": 6820 }, { "epoch": 0.012095969871779529, "grad_norm": 0.44921875, "learning_rate": 0.00198338013002939, "loss": 0.2651, "step": 6822 }, { "epoch": 0.012099516037089343, "grad_norm": 0.52734375, "learning_rate": 0.0019833687427493556, "loss": 0.3148, "step": 6824 }, { "epoch": 0.012103062202399158, "grad_norm": 0.87109375, "learning_rate": 0.0019833573516059794, "loss": 0.264, "step": 6826 }, { "epoch": 0.012106608367708974, "grad_norm": 0.3359375, "learning_rate": 0.001983345956599311, "loss": 0.2805, "step": 6828 }, { "epoch": 0.012110154533018789, "grad_norm": 1.21875, "learning_rate": 0.0019833345577294006, "loss": 0.3786, "step": 6830 }, { "epoch": 0.012113700698328603, "grad_norm": 0.2158203125, "learning_rate": 0.001983323154996298, "loss": 0.2356, "step": 6832 }, { "epoch": 0.01211724686363842, "grad_norm": 0.357421875, "learning_rate": 0.0019833117484000535, "loss": 0.2613, "step": 6834 }, { "epoch": 0.012120793028948234, "grad_norm": 0.4140625, "learning_rate": 0.001983300337940716, "loss": 0.2556, "step": 6836 }, { "epoch": 0.012124339194258048, "grad_norm": 6.59375, "learning_rate": 0.0019832889236183356, "loss": 0.3318, "step": 6838 }, { "epoch": 0.012127885359567865, "grad_norm": 0.9765625, "learning_rate": 0.001983277505432963, "loss": 0.1884, "step": 6840 }, { "epoch": 0.01213143152487768, "grad_norm": 0.1826171875, "learning_rate": 0.0019832660833846473, "loss": 0.2034, "step": 6842 }, { "epoch": 0.012134977690187494, "grad_norm": 0.58203125, "learning_rate": 0.0019832546574734397, "loss": 0.2472, "step": 6844 }, { "epoch": 0.01213852385549731, "grad_norm": 0.2421875, "learning_rate": 0.0019832432276993884, "loss": 0.2302, "step": 6846 }, { "epoch": 0.012142070020807125, "grad_norm": 0.392578125, "learning_rate": 0.0019832317940625447, "loss": 0.2369, "step": 6848 }, { "epoch": 0.01214561618611694, "grad_norm": 0.484375, "learning_rate": 0.001983220356562958, "loss": 0.3195, "step": 6850 }, { "epoch": 0.012149162351426756, "grad_norm": 0.380859375, "learning_rate": 0.0019832089152006785, "loss": 0.2562, "step": 6852 }, { "epoch": 0.01215270851673657, "grad_norm": 0.58984375, "learning_rate": 0.001983197469975756, "loss": 0.2764, "step": 6854 }, { "epoch": 0.012156254682046386, "grad_norm": 0.337890625, "learning_rate": 0.001983186020888241, "loss": 0.2632, "step": 6856 }, { "epoch": 0.012159800847356201, "grad_norm": 0.46875, "learning_rate": 0.0019831745679381833, "loss": 0.2197, "step": 6858 }, { "epoch": 0.012163347012666016, "grad_norm": 1.421875, "learning_rate": 0.001983163111125633, "loss": 0.2786, "step": 6860 }, { "epoch": 0.012166893177975832, "grad_norm": 0.68359375, "learning_rate": 0.0019831516504506407, "loss": 0.2758, "step": 6862 }, { "epoch": 0.012170439343285646, "grad_norm": 0.56640625, "learning_rate": 0.0019831401859132553, "loss": 0.2994, "step": 6864 }, { "epoch": 0.012173985508595461, "grad_norm": 0.353515625, "learning_rate": 0.0019831287175135284, "loss": 0.2428, "step": 6866 }, { "epoch": 0.012177531673905277, "grad_norm": 0.55859375, "learning_rate": 0.001983117245251509, "loss": 0.2468, "step": 6868 }, { "epoch": 0.012181077839215092, "grad_norm": 0.439453125, "learning_rate": 0.001983105769127248, "loss": 0.2521, "step": 6870 }, { "epoch": 0.012184624004524906, "grad_norm": 0.251953125, "learning_rate": 0.001983094289140796, "loss": 0.2257, "step": 6872 }, { "epoch": 0.012188170169834723, "grad_norm": 0.93359375, "learning_rate": 0.0019830828052922016, "loss": 0.224, "step": 6874 }, { "epoch": 0.012191716335144537, "grad_norm": 0.70703125, "learning_rate": 0.001983071317581516, "loss": 0.2698, "step": 6876 }, { "epoch": 0.012195262500454352, "grad_norm": 0.38671875, "learning_rate": 0.0019830598260087897, "loss": 0.3066, "step": 6878 }, { "epoch": 0.012198808665764168, "grad_norm": 0.3828125, "learning_rate": 0.0019830483305740727, "loss": 0.2594, "step": 6880 }, { "epoch": 0.012202354831073983, "grad_norm": 0.66796875, "learning_rate": 0.001983036831277415, "loss": 0.2496, "step": 6882 }, { "epoch": 0.012205900996383797, "grad_norm": 0.2236328125, "learning_rate": 0.0019830253281188674, "loss": 0.1695, "step": 6884 }, { "epoch": 0.012209447161693613, "grad_norm": 0.25, "learning_rate": 0.0019830138210984796, "loss": 0.2206, "step": 6886 }, { "epoch": 0.012212993327003428, "grad_norm": 0.71875, "learning_rate": 0.0019830023102163025, "loss": 0.277, "step": 6888 }, { "epoch": 0.012216539492313244, "grad_norm": 2.328125, "learning_rate": 0.0019829907954723863, "loss": 0.3232, "step": 6890 }, { "epoch": 0.012220085657623059, "grad_norm": 0.5703125, "learning_rate": 0.001982979276866781, "loss": 0.1966, "step": 6892 }, { "epoch": 0.012223631822932873, "grad_norm": 0.30859375, "learning_rate": 0.0019829677543995376, "loss": 0.2742, "step": 6894 }, { "epoch": 0.01222717798824269, "grad_norm": 0.369140625, "learning_rate": 0.0019829562280707057, "loss": 0.3344, "step": 6896 }, { "epoch": 0.012230724153552504, "grad_norm": 0.3359375, "learning_rate": 0.0019829446978803364, "loss": 0.2517, "step": 6898 }, { "epoch": 0.012234270318862319, "grad_norm": 11.25, "learning_rate": 0.00198293316382848, "loss": 0.3401, "step": 6900 }, { "epoch": 0.012237816484172135, "grad_norm": 0.318359375, "learning_rate": 0.0019829216259151863, "loss": 0.1887, "step": 6902 }, { "epoch": 0.01224136264948195, "grad_norm": 0.37109375, "learning_rate": 0.0019829100841405067, "loss": 0.1818, "step": 6904 }, { "epoch": 0.012244908814791764, "grad_norm": 4.0, "learning_rate": 0.0019828985385044913, "loss": 0.3568, "step": 6906 }, { "epoch": 0.01224845498010158, "grad_norm": 0.21875, "learning_rate": 0.00198288698900719, "loss": 0.3633, "step": 6908 }, { "epoch": 0.012252001145411395, "grad_norm": 1.4140625, "learning_rate": 0.0019828754356486546, "loss": 0.4123, "step": 6910 }, { "epoch": 0.01225554731072121, "grad_norm": 0.2041015625, "learning_rate": 0.0019828638784289347, "loss": 0.2238, "step": 6912 }, { "epoch": 0.012259093476031026, "grad_norm": 1.0546875, "learning_rate": 0.0019828523173480808, "loss": 0.2899, "step": 6914 }, { "epoch": 0.01226263964134084, "grad_norm": 0.427734375, "learning_rate": 0.0019828407524061435, "loss": 0.2454, "step": 6916 }, { "epoch": 0.012266185806650655, "grad_norm": 0.49609375, "learning_rate": 0.0019828291836031737, "loss": 0.2928, "step": 6918 }, { "epoch": 0.012269731971960471, "grad_norm": 0.25390625, "learning_rate": 0.001982817610939222, "loss": 0.2622, "step": 6920 }, { "epoch": 0.012273278137270286, "grad_norm": 0.67578125, "learning_rate": 0.0019828060344143387, "loss": 0.3946, "step": 6922 }, { "epoch": 0.012276824302580102, "grad_norm": 0.56640625, "learning_rate": 0.0019827944540285747, "loss": 0.215, "step": 6924 }, { "epoch": 0.012280370467889917, "grad_norm": 0.87109375, "learning_rate": 0.0019827828697819806, "loss": 0.4017, "step": 6926 }, { "epoch": 0.012283916633199731, "grad_norm": 0.765625, "learning_rate": 0.0019827712816746067, "loss": 0.3001, "step": 6928 }, { "epoch": 0.012287462798509547, "grad_norm": 0.314453125, "learning_rate": 0.0019827596897065048, "loss": 0.2846, "step": 6930 }, { "epoch": 0.012291008963819362, "grad_norm": 0.83203125, "learning_rate": 0.0019827480938777236, "loss": 0.3703, "step": 6932 }, { "epoch": 0.012294555129129177, "grad_norm": 0.64453125, "learning_rate": 0.001982736494188316, "loss": 0.2309, "step": 6934 }, { "epoch": 0.012298101294438993, "grad_norm": 1.7109375, "learning_rate": 0.0019827248906383317, "loss": 0.3196, "step": 6936 }, { "epoch": 0.012301647459748807, "grad_norm": 0.2177734375, "learning_rate": 0.0019827132832278206, "loss": 0.291, "step": 6938 }, { "epoch": 0.012305193625058622, "grad_norm": 0.322265625, "learning_rate": 0.001982701671956835, "loss": 0.2409, "step": 6940 }, { "epoch": 0.012308739790368438, "grad_norm": 0.55078125, "learning_rate": 0.001982690056825425, "loss": 0.2597, "step": 6942 }, { "epoch": 0.012312285955678253, "grad_norm": 0.27734375, "learning_rate": 0.001982678437833641, "loss": 0.2432, "step": 6944 }, { "epoch": 0.012315832120988067, "grad_norm": 0.361328125, "learning_rate": 0.0019826668149815346, "loss": 0.2968, "step": 6946 }, { "epoch": 0.012319378286297884, "grad_norm": 0.380859375, "learning_rate": 0.001982655188269156, "loss": 0.2664, "step": 6948 }, { "epoch": 0.012322924451607698, "grad_norm": 0.27734375, "learning_rate": 0.001982643557696557, "loss": 0.2904, "step": 6950 }, { "epoch": 0.012326470616917513, "grad_norm": 0.2255859375, "learning_rate": 0.0019826319232637874, "loss": 0.2259, "step": 6952 }, { "epoch": 0.012330016782227329, "grad_norm": 0.365234375, "learning_rate": 0.001982620284970898, "loss": 0.3693, "step": 6954 }, { "epoch": 0.012333562947537144, "grad_norm": 0.6015625, "learning_rate": 0.0019826086428179407, "loss": 0.1981, "step": 6956 }, { "epoch": 0.01233710911284696, "grad_norm": 1.9375, "learning_rate": 0.001982596996804966, "loss": 0.3156, "step": 6958 }, { "epoch": 0.012340655278156774, "grad_norm": 0.294921875, "learning_rate": 0.001982585346932024, "loss": 0.2237, "step": 6960 }, { "epoch": 0.012344201443466589, "grad_norm": 0.4609375, "learning_rate": 0.001982573693199167, "loss": 0.2888, "step": 6962 }, { "epoch": 0.012347747608776405, "grad_norm": 0.3515625, "learning_rate": 0.001982562035606445, "loss": 0.2527, "step": 6964 }, { "epoch": 0.01235129377408622, "grad_norm": 0.287109375, "learning_rate": 0.0019825503741539097, "loss": 0.2258, "step": 6966 }, { "epoch": 0.012354839939396034, "grad_norm": 0.29296875, "learning_rate": 0.0019825387088416115, "loss": 0.2394, "step": 6968 }, { "epoch": 0.01235838610470585, "grad_norm": 0.3828125, "learning_rate": 0.0019825270396696014, "loss": 0.2348, "step": 6970 }, { "epoch": 0.012361932270015665, "grad_norm": 0.48828125, "learning_rate": 0.0019825153666379303, "loss": 0.2296, "step": 6972 }, { "epoch": 0.01236547843532548, "grad_norm": 0.32421875, "learning_rate": 0.0019825036897466505, "loss": 0.1985, "step": 6974 }, { "epoch": 0.012369024600635296, "grad_norm": 0.3203125, "learning_rate": 0.0019824920089958117, "loss": 0.269, "step": 6976 }, { "epoch": 0.01237257076594511, "grad_norm": 0.224609375, "learning_rate": 0.0019824803243854655, "loss": 0.2197, "step": 6978 }, { "epoch": 0.012376116931254925, "grad_norm": 2.546875, "learning_rate": 0.0019824686359156633, "loss": 0.4333, "step": 6980 }, { "epoch": 0.012379663096564741, "grad_norm": 1.1484375, "learning_rate": 0.0019824569435864556, "loss": 0.2689, "step": 6982 }, { "epoch": 0.012383209261874556, "grad_norm": 0.1953125, "learning_rate": 0.001982445247397894, "loss": 0.2146, "step": 6984 }, { "epoch": 0.01238675542718437, "grad_norm": 0.97265625, "learning_rate": 0.001982433547350029, "loss": 0.1743, "step": 6986 }, { "epoch": 0.012390301592494187, "grad_norm": 0.49609375, "learning_rate": 0.0019824218434429126, "loss": 0.2409, "step": 6988 }, { "epoch": 0.012393847757804001, "grad_norm": 0.1650390625, "learning_rate": 0.0019824101356765954, "loss": 0.2394, "step": 6990 }, { "epoch": 0.012397393923113818, "grad_norm": 0.3828125, "learning_rate": 0.001982398424051129, "loss": 0.2411, "step": 6992 }, { "epoch": 0.012400940088423632, "grad_norm": 0.369140625, "learning_rate": 0.001982386708566565, "loss": 0.2816, "step": 6994 }, { "epoch": 0.012404486253733447, "grad_norm": 0.2421875, "learning_rate": 0.0019823749892229534, "loss": 0.2394, "step": 6996 }, { "epoch": 0.012408032419043263, "grad_norm": 0.4375, "learning_rate": 0.001982363266020346, "loss": 0.2617, "step": 6998 }, { "epoch": 0.012411578584353078, "grad_norm": 0.6171875, "learning_rate": 0.0019823515389587945, "loss": 0.2929, "step": 7000 }, { "epoch": 0.012415124749662892, "grad_norm": 0.65625, "learning_rate": 0.0019823398080383503, "loss": 0.4538, "step": 7002 }, { "epoch": 0.012418670914972708, "grad_norm": 8.125, "learning_rate": 0.001982328073259064, "loss": 0.2788, "step": 7004 }, { "epoch": 0.012422217080282523, "grad_norm": 1.9296875, "learning_rate": 0.0019823163346209868, "loss": 0.3532, "step": 7006 }, { "epoch": 0.012425763245592338, "grad_norm": 0.6875, "learning_rate": 0.0019823045921241707, "loss": 0.2222, "step": 7008 }, { "epoch": 0.012429309410902154, "grad_norm": 0.2138671875, "learning_rate": 0.001982292845768667, "loss": 0.2737, "step": 7010 }, { "epoch": 0.012432855576211968, "grad_norm": 0.2734375, "learning_rate": 0.0019822810955545268, "loss": 0.3135, "step": 7012 }, { "epoch": 0.012436401741521783, "grad_norm": 0.365234375, "learning_rate": 0.0019822693414818016, "loss": 0.2874, "step": 7014 }, { "epoch": 0.0124399479068316, "grad_norm": 0.34375, "learning_rate": 0.001982257583550543, "loss": 0.2411, "step": 7016 }, { "epoch": 0.012443494072141414, "grad_norm": 0.26953125, "learning_rate": 0.001982245821760802, "loss": 0.2644, "step": 7018 }, { "epoch": 0.012447040237451228, "grad_norm": 0.72265625, "learning_rate": 0.00198223405611263, "loss": 0.2268, "step": 7020 }, { "epoch": 0.012450586402761045, "grad_norm": 0.6640625, "learning_rate": 0.0019822222866060788, "loss": 0.254, "step": 7022 }, { "epoch": 0.01245413256807086, "grad_norm": 0.251953125, "learning_rate": 0.0019822105132412, "loss": 0.287, "step": 7024 }, { "epoch": 0.012457678733380675, "grad_norm": 1.8515625, "learning_rate": 0.001982198736018045, "loss": 0.2902, "step": 7026 }, { "epoch": 0.01246122489869049, "grad_norm": 0.71484375, "learning_rate": 0.001982186954936665, "loss": 0.5944, "step": 7028 }, { "epoch": 0.012464771064000305, "grad_norm": 2.28125, "learning_rate": 0.001982175169997111, "loss": 0.4291, "step": 7030 }, { "epoch": 0.01246831722931012, "grad_norm": 2.328125, "learning_rate": 0.001982163381199436, "loss": 0.3188, "step": 7032 }, { "epoch": 0.012471863394619935, "grad_norm": 0.31640625, "learning_rate": 0.001982151588543691, "loss": 0.2657, "step": 7034 }, { "epoch": 0.01247540955992975, "grad_norm": 0.52734375, "learning_rate": 0.001982139792029927, "loss": 0.262, "step": 7036 }, { "epoch": 0.012478955725239566, "grad_norm": 0.47265625, "learning_rate": 0.001982127991658196, "loss": 0.2731, "step": 7038 }, { "epoch": 0.01248250189054938, "grad_norm": 0.23046875, "learning_rate": 0.0019821161874285496, "loss": 0.208, "step": 7040 }, { "epoch": 0.012486048055859195, "grad_norm": 0.56640625, "learning_rate": 0.001982104379341039, "loss": 0.3549, "step": 7042 }, { "epoch": 0.012489594221169012, "grad_norm": 0.4921875, "learning_rate": 0.0019820925673957168, "loss": 0.2285, "step": 7044 }, { "epoch": 0.012493140386478826, "grad_norm": 0.306640625, "learning_rate": 0.0019820807515926334, "loss": 0.2439, "step": 7046 }, { "epoch": 0.01249668655178864, "grad_norm": 0.419921875, "learning_rate": 0.0019820689319318416, "loss": 0.2338, "step": 7048 }, { "epoch": 0.012500232717098457, "grad_norm": 0.44140625, "learning_rate": 0.001982057108413393, "loss": 0.4401, "step": 7050 }, { "epoch": 0.012503778882408272, "grad_norm": 0.484375, "learning_rate": 0.001982045281037339, "loss": 0.2782, "step": 7052 }, { "epoch": 0.012507325047718086, "grad_norm": 0.5390625, "learning_rate": 0.0019820334498037305, "loss": 0.2343, "step": 7054 }, { "epoch": 0.012510871213027902, "grad_norm": 0.408203125, "learning_rate": 0.0019820216147126207, "loss": 0.2572, "step": 7056 }, { "epoch": 0.012514417378337717, "grad_norm": 0.302734375, "learning_rate": 0.0019820097757640605, "loss": 0.2594, "step": 7058 }, { "epoch": 0.012517963543647533, "grad_norm": 0.5859375, "learning_rate": 0.0019819979329581015, "loss": 0.2041, "step": 7060 }, { "epoch": 0.012521509708957348, "grad_norm": 0.369140625, "learning_rate": 0.0019819860862947966, "loss": 0.2931, "step": 7062 }, { "epoch": 0.012525055874267162, "grad_norm": 0.41015625, "learning_rate": 0.0019819742357741962, "loss": 0.2811, "step": 7064 }, { "epoch": 0.012528602039576979, "grad_norm": 1.0546875, "learning_rate": 0.001981962381396353, "loss": 0.3539, "step": 7066 }, { "epoch": 0.012532148204886793, "grad_norm": 0.2734375, "learning_rate": 0.0019819505231613186, "loss": 0.2371, "step": 7068 }, { "epoch": 0.012535694370196608, "grad_norm": 0.267578125, "learning_rate": 0.001981938661069145, "loss": 0.2808, "step": 7070 }, { "epoch": 0.012539240535506424, "grad_norm": 0.5390625, "learning_rate": 0.001981926795119884, "loss": 0.4738, "step": 7072 }, { "epoch": 0.012542786700816239, "grad_norm": 0.2734375, "learning_rate": 0.001981914925313588, "loss": 0.3067, "step": 7074 }, { "epoch": 0.012546332866126053, "grad_norm": 0.734375, "learning_rate": 0.001981903051650308, "loss": 0.4865, "step": 7076 }, { "epoch": 0.01254987903143587, "grad_norm": 0.32421875, "learning_rate": 0.001981891174130096, "loss": 0.2807, "step": 7078 }, { "epoch": 0.012553425196745684, "grad_norm": 0.51171875, "learning_rate": 0.0019818792927530043, "loss": 0.2514, "step": 7080 }, { "epoch": 0.012556971362055499, "grad_norm": 0.390625, "learning_rate": 0.0019818674075190853, "loss": 0.2599, "step": 7082 }, { "epoch": 0.012560517527365315, "grad_norm": 0.490234375, "learning_rate": 0.0019818555184283903, "loss": 0.2465, "step": 7084 }, { "epoch": 0.01256406369267513, "grad_norm": 0.99609375, "learning_rate": 0.0019818436254809713, "loss": 0.293, "step": 7086 }, { "epoch": 0.012567609857984944, "grad_norm": 0.3828125, "learning_rate": 0.0019818317286768804, "loss": 0.3002, "step": 7088 }, { "epoch": 0.01257115602329476, "grad_norm": 0.28515625, "learning_rate": 0.0019818198280161705, "loss": 0.3467, "step": 7090 }, { "epoch": 0.012574702188604575, "grad_norm": 0.353515625, "learning_rate": 0.0019818079234988923, "loss": 0.2387, "step": 7092 }, { "epoch": 0.012578248353914391, "grad_norm": 13.4375, "learning_rate": 0.0019817960151250983, "loss": 0.445, "step": 7094 }, { "epoch": 0.012581794519224206, "grad_norm": 0.328125, "learning_rate": 0.0019817841028948414, "loss": 0.2352, "step": 7096 }, { "epoch": 0.01258534068453402, "grad_norm": 0.2314453125, "learning_rate": 0.0019817721868081724, "loss": 0.1747, "step": 7098 }, { "epoch": 0.012588886849843836, "grad_norm": 0.263671875, "learning_rate": 0.001981760266865144, "loss": 0.2832, "step": 7100 }, { "epoch": 0.012592433015153651, "grad_norm": 0.30078125, "learning_rate": 0.001981748343065809, "loss": 0.2656, "step": 7102 }, { "epoch": 0.012595979180463466, "grad_norm": 0.69140625, "learning_rate": 0.0019817364154102184, "loss": 0.2461, "step": 7104 }, { "epoch": 0.012599525345773282, "grad_norm": 0.54296875, "learning_rate": 0.001981724483898425, "loss": 0.2458, "step": 7106 }, { "epoch": 0.012603071511083096, "grad_norm": 3.265625, "learning_rate": 0.001981712548530481, "loss": 0.5001, "step": 7108 }, { "epoch": 0.012606617676392911, "grad_norm": 0.470703125, "learning_rate": 0.0019817006093064385, "loss": 0.3432, "step": 7110 }, { "epoch": 0.012610163841702727, "grad_norm": 1.265625, "learning_rate": 0.0019816886662263494, "loss": 0.2609, "step": 7112 }, { "epoch": 0.012613710007012542, "grad_norm": 0.30078125, "learning_rate": 0.001981676719290267, "loss": 0.2565, "step": 7114 }, { "epoch": 0.012617256172322356, "grad_norm": 0.66796875, "learning_rate": 0.001981664768498242, "loss": 0.2765, "step": 7116 }, { "epoch": 0.012620802337632173, "grad_norm": 0.5234375, "learning_rate": 0.0019816528138503274, "loss": 0.2701, "step": 7118 }, { "epoch": 0.012624348502941987, "grad_norm": 0.6875, "learning_rate": 0.001981640855346576, "loss": 0.2449, "step": 7120 }, { "epoch": 0.012627894668251802, "grad_norm": 0.205078125, "learning_rate": 0.0019816288929870394, "loss": 0.2288, "step": 7122 }, { "epoch": 0.012631440833561618, "grad_norm": 0.52734375, "learning_rate": 0.0019816169267717703, "loss": 0.2604, "step": 7124 }, { "epoch": 0.012634986998871433, "grad_norm": 0.625, "learning_rate": 0.0019816049567008202, "loss": 0.2875, "step": 7126 }, { "epoch": 0.012638533164181249, "grad_norm": 0.609375, "learning_rate": 0.0019815929827742425, "loss": 0.5593, "step": 7128 }, { "epoch": 0.012642079329491063, "grad_norm": 0.240234375, "learning_rate": 0.0019815810049920897, "loss": 0.1914, "step": 7130 }, { "epoch": 0.012645625494800878, "grad_norm": 0.25, "learning_rate": 0.0019815690233544133, "loss": 0.2331, "step": 7132 }, { "epoch": 0.012649171660110694, "grad_norm": 0.380859375, "learning_rate": 0.001981557037861266, "loss": 0.3437, "step": 7134 }, { "epoch": 0.012652717825420509, "grad_norm": 0.32421875, "learning_rate": 0.0019815450485127, "loss": 0.2202, "step": 7136 }, { "epoch": 0.012656263990730323, "grad_norm": 0.50390625, "learning_rate": 0.0019815330553087686, "loss": 0.2587, "step": 7138 }, { "epoch": 0.01265981015604014, "grad_norm": 0.52734375, "learning_rate": 0.0019815210582495232, "loss": 0.2261, "step": 7140 }, { "epoch": 0.012663356321349954, "grad_norm": 0.87890625, "learning_rate": 0.001981509057335017, "loss": 0.383, "step": 7142 }, { "epoch": 0.012666902486659769, "grad_norm": 0.58984375, "learning_rate": 0.001981497052565302, "loss": 0.2801, "step": 7144 }, { "epoch": 0.012670448651969585, "grad_norm": 0.40625, "learning_rate": 0.001981485043940431, "loss": 0.2346, "step": 7146 }, { "epoch": 0.0126739948172794, "grad_norm": 0.85546875, "learning_rate": 0.0019814730314604567, "loss": 0.3033, "step": 7148 }, { "epoch": 0.012677540982589214, "grad_norm": 0.43359375, "learning_rate": 0.001981461015125431, "loss": 0.2223, "step": 7150 }, { "epoch": 0.01268108714789903, "grad_norm": 0.7734375, "learning_rate": 0.0019814489949354073, "loss": 0.2798, "step": 7152 }, { "epoch": 0.012684633313208845, "grad_norm": 0.421875, "learning_rate": 0.0019814369708904375, "loss": 0.2874, "step": 7154 }, { "epoch": 0.01268817947851866, "grad_norm": 0.2275390625, "learning_rate": 0.0019814249429905744, "loss": 0.2074, "step": 7156 }, { "epoch": 0.012691725643828476, "grad_norm": 0.349609375, "learning_rate": 0.0019814129112358703, "loss": 0.3419, "step": 7158 }, { "epoch": 0.01269527180913829, "grad_norm": 0.2734375, "learning_rate": 0.001981400875626378, "loss": 0.2377, "step": 7160 }, { "epoch": 0.012698817974448107, "grad_norm": 0.40234375, "learning_rate": 0.001981388836162151, "loss": 0.256, "step": 7162 }, { "epoch": 0.012702364139757921, "grad_norm": 0.6484375, "learning_rate": 0.0019813767928432407, "loss": 0.2384, "step": 7164 }, { "epoch": 0.012705910305067736, "grad_norm": 0.486328125, "learning_rate": 0.0019813647456697002, "loss": 0.2071, "step": 7166 }, { "epoch": 0.012709456470377552, "grad_norm": 0.55859375, "learning_rate": 0.0019813526946415826, "loss": 0.2576, "step": 7168 }, { "epoch": 0.012713002635687367, "grad_norm": 2.234375, "learning_rate": 0.00198134063975894, "loss": 0.5217, "step": 7170 }, { "epoch": 0.012716548800997181, "grad_norm": 0.28125, "learning_rate": 0.0019813285810218254, "loss": 0.2629, "step": 7172 }, { "epoch": 0.012720094966306997, "grad_norm": 0.337890625, "learning_rate": 0.0019813165184302916, "loss": 0.2286, "step": 7174 }, { "epoch": 0.012723641131616812, "grad_norm": 1.375, "learning_rate": 0.0019813044519843915, "loss": 0.2999, "step": 7176 }, { "epoch": 0.012727187296926627, "grad_norm": 0.6875, "learning_rate": 0.0019812923816841773, "loss": 0.2969, "step": 7178 }, { "epoch": 0.012730733462236443, "grad_norm": 0.416015625, "learning_rate": 0.001981280307529702, "loss": 0.2352, "step": 7180 }, { "epoch": 0.012734279627546257, "grad_norm": 0.3984375, "learning_rate": 0.001981268229521019, "loss": 0.329, "step": 7182 }, { "epoch": 0.012737825792856072, "grad_norm": 0.369140625, "learning_rate": 0.0019812561476581806, "loss": 0.229, "step": 7184 }, { "epoch": 0.012741371958165888, "grad_norm": 1.1328125, "learning_rate": 0.001981244061941239, "loss": 0.2523, "step": 7186 }, { "epoch": 0.012744918123475703, "grad_norm": 2.265625, "learning_rate": 0.001981231972370249, "loss": 0.3817, "step": 7188 }, { "epoch": 0.012748464288785517, "grad_norm": 0.4140625, "learning_rate": 0.0019812198789452614, "loss": 0.2409, "step": 7190 }, { "epoch": 0.012752010454095334, "grad_norm": 0.6796875, "learning_rate": 0.00198120778166633, "loss": 0.3105, "step": 7192 }, { "epoch": 0.012755556619405148, "grad_norm": 0.251953125, "learning_rate": 0.0019811956805335074, "loss": 0.268, "step": 7194 }, { "epoch": 0.012759102784714964, "grad_norm": 0.40625, "learning_rate": 0.0019811835755468472, "loss": 0.2836, "step": 7196 }, { "epoch": 0.012762648950024779, "grad_norm": 0.310546875, "learning_rate": 0.0019811714667064017, "loss": 0.2399, "step": 7198 }, { "epoch": 0.012766195115334594, "grad_norm": 0.33203125, "learning_rate": 0.001981159354012224, "loss": 0.2313, "step": 7200 }, { "epoch": 0.01276974128064441, "grad_norm": 0.3046875, "learning_rate": 0.0019811472374643676, "loss": 0.3612, "step": 7202 }, { "epoch": 0.012773287445954224, "grad_norm": 0.466796875, "learning_rate": 0.0019811351170628847, "loss": 0.2715, "step": 7204 }, { "epoch": 0.012776833611264039, "grad_norm": 0.57421875, "learning_rate": 0.0019811229928078287, "loss": 0.2722, "step": 7206 }, { "epoch": 0.012780379776573855, "grad_norm": 0.3984375, "learning_rate": 0.001981110864699252, "loss": 0.249, "step": 7208 }, { "epoch": 0.01278392594188367, "grad_norm": 0.255859375, "learning_rate": 0.0019810987327372087, "loss": 0.3748, "step": 7210 }, { "epoch": 0.012787472107193484, "grad_norm": 1.5703125, "learning_rate": 0.001981086596921751, "loss": 0.3629, "step": 7212 }, { "epoch": 0.0127910182725033, "grad_norm": 1.4765625, "learning_rate": 0.001981074457252933, "loss": 0.3046, "step": 7214 }, { "epoch": 0.012794564437813115, "grad_norm": 0.49609375, "learning_rate": 0.0019810623137308065, "loss": 0.2893, "step": 7216 }, { "epoch": 0.01279811060312293, "grad_norm": 0.2578125, "learning_rate": 0.001981050166355426, "loss": 0.1819, "step": 7218 }, { "epoch": 0.012801656768432746, "grad_norm": 0.384765625, "learning_rate": 0.001981038015126843, "loss": 0.2163, "step": 7220 }, { "epoch": 0.01280520293374256, "grad_norm": 0.68359375, "learning_rate": 0.0019810258600451115, "loss": 0.2835, "step": 7222 }, { "epoch": 0.012808749099052375, "grad_norm": 0.306640625, "learning_rate": 0.001981013701110285, "loss": 0.2571, "step": 7224 }, { "epoch": 0.012812295264362191, "grad_norm": 0.53125, "learning_rate": 0.001981001538322416, "loss": 0.2347, "step": 7226 }, { "epoch": 0.012815841429672006, "grad_norm": 0.333984375, "learning_rate": 0.001980989371681558, "loss": 0.2317, "step": 7228 }, { "epoch": 0.012819387594981822, "grad_norm": 0.4140625, "learning_rate": 0.001980977201187765, "loss": 0.2647, "step": 7230 }, { "epoch": 0.012822933760291637, "grad_norm": 0.400390625, "learning_rate": 0.0019809650268410887, "loss": 0.3027, "step": 7232 }, { "epoch": 0.012826479925601451, "grad_norm": 0.30078125, "learning_rate": 0.0019809528486415835, "loss": 0.2051, "step": 7234 }, { "epoch": 0.012830026090911268, "grad_norm": 0.3125, "learning_rate": 0.001980940666589302, "loss": 0.326, "step": 7236 }, { "epoch": 0.012833572256221082, "grad_norm": 0.375, "learning_rate": 0.0019809284806842978, "loss": 0.2098, "step": 7238 }, { "epoch": 0.012837118421530897, "grad_norm": 0.1904296875, "learning_rate": 0.001980916290926624, "loss": 0.2324, "step": 7240 }, { "epoch": 0.012840664586840713, "grad_norm": 0.376953125, "learning_rate": 0.001980904097316334, "loss": 0.2191, "step": 7242 }, { "epoch": 0.012844210752150528, "grad_norm": 0.3515625, "learning_rate": 0.001980891899853482, "loss": 0.2297, "step": 7244 }, { "epoch": 0.012847756917460342, "grad_norm": 0.7734375, "learning_rate": 0.0019808796985381193, "loss": 0.2185, "step": 7246 }, { "epoch": 0.012851303082770158, "grad_norm": 0.7578125, "learning_rate": 0.001980867493370301, "loss": 0.247, "step": 7248 }, { "epoch": 0.012854849248079973, "grad_norm": 0.38671875, "learning_rate": 0.00198085528435008, "loss": 0.2162, "step": 7250 }, { "epoch": 0.012858395413389788, "grad_norm": 0.5625, "learning_rate": 0.0019808430714775096, "loss": 0.2796, "step": 7252 }, { "epoch": 0.012861941578699604, "grad_norm": 1.3359375, "learning_rate": 0.0019808308547526435, "loss": 0.2255, "step": 7254 }, { "epoch": 0.012865487744009418, "grad_norm": 0.1826171875, "learning_rate": 0.0019808186341755346, "loss": 0.2234, "step": 7256 }, { "epoch": 0.012869033909319233, "grad_norm": 0.921875, "learning_rate": 0.001980806409746237, "loss": 0.3388, "step": 7258 }, { "epoch": 0.01287258007462905, "grad_norm": 0.341796875, "learning_rate": 0.0019807941814648034, "loss": 0.3057, "step": 7260 }, { "epoch": 0.012876126239938864, "grad_norm": 0.67578125, "learning_rate": 0.0019807819493312877, "loss": 0.292, "step": 7262 }, { "epoch": 0.01287967240524868, "grad_norm": 0.298828125, "learning_rate": 0.0019807697133457434, "loss": 0.2776, "step": 7264 }, { "epoch": 0.012883218570558495, "grad_norm": 2.03125, "learning_rate": 0.001980757473508224, "loss": 0.3523, "step": 7266 }, { "epoch": 0.01288676473586831, "grad_norm": 0.1513671875, "learning_rate": 0.0019807452298187833, "loss": 0.2111, "step": 7268 }, { "epoch": 0.012890310901178125, "grad_norm": 0.6484375, "learning_rate": 0.0019807329822774744, "loss": 0.3608, "step": 7270 }, { "epoch": 0.01289385706648794, "grad_norm": 0.302734375, "learning_rate": 0.0019807207308843505, "loss": 0.5, "step": 7272 }, { "epoch": 0.012897403231797755, "grad_norm": 1.5390625, "learning_rate": 0.0019807084756394665, "loss": 0.3164, "step": 7274 }, { "epoch": 0.012900949397107571, "grad_norm": 0.255859375, "learning_rate": 0.001980696216542875, "loss": 0.2405, "step": 7276 }, { "epoch": 0.012904495562417385, "grad_norm": 0.9140625, "learning_rate": 0.0019806839535946295, "loss": 0.2778, "step": 7278 }, { "epoch": 0.0129080417277272, "grad_norm": 0.2470703125, "learning_rate": 0.001980671686794784, "loss": 0.2086, "step": 7280 }, { "epoch": 0.012911587893037016, "grad_norm": 0.30078125, "learning_rate": 0.0019806594161433924, "loss": 0.3295, "step": 7282 }, { "epoch": 0.01291513405834683, "grad_norm": 1.0078125, "learning_rate": 0.001980647141640508, "loss": 0.301, "step": 7284 }, { "epoch": 0.012918680223656645, "grad_norm": 0.439453125, "learning_rate": 0.001980634863286184, "loss": 0.3033, "step": 7286 }, { "epoch": 0.012922226388966462, "grad_norm": 0.41796875, "learning_rate": 0.0019806225810804754, "loss": 0.2387, "step": 7288 }, { "epoch": 0.012925772554276276, "grad_norm": 0.2216796875, "learning_rate": 0.0019806102950234348, "loss": 0.2494, "step": 7290 }, { "epoch": 0.01292931871958609, "grad_norm": 0.3671875, "learning_rate": 0.0019805980051151163, "loss": 0.2023, "step": 7292 }, { "epoch": 0.012932864884895907, "grad_norm": 0.4453125, "learning_rate": 0.001980585711355574, "loss": 0.2692, "step": 7294 }, { "epoch": 0.012936411050205722, "grad_norm": 0.296875, "learning_rate": 0.0019805734137448607, "loss": 0.2295, "step": 7296 }, { "epoch": 0.012939957215515538, "grad_norm": 0.265625, "learning_rate": 0.001980561112283031, "loss": 0.2222, "step": 7298 }, { "epoch": 0.012943503380825352, "grad_norm": 0.416015625, "learning_rate": 0.0019805488069701387, "loss": 0.3077, "step": 7300 }, { "epoch": 0.012947049546135167, "grad_norm": 0.4453125, "learning_rate": 0.001980536497806237, "loss": 0.2136, "step": 7302 }, { "epoch": 0.012950595711444983, "grad_norm": 0.65625, "learning_rate": 0.0019805241847913805, "loss": 0.257, "step": 7304 }, { "epoch": 0.012954141876754798, "grad_norm": 0.890625, "learning_rate": 0.0019805118679256223, "loss": 0.2409, "step": 7306 }, { "epoch": 0.012957688042064612, "grad_norm": 3.328125, "learning_rate": 0.001980499547209017, "loss": 0.4833, "step": 7308 }, { "epoch": 0.012961234207374429, "grad_norm": 0.2421875, "learning_rate": 0.0019804872226416178, "loss": 0.2097, "step": 7310 }, { "epoch": 0.012964780372684243, "grad_norm": 0.58203125, "learning_rate": 0.0019804748942234794, "loss": 0.3131, "step": 7312 }, { "epoch": 0.012968326537994058, "grad_norm": 0.3984375, "learning_rate": 0.0019804625619546552, "loss": 0.2601, "step": 7314 }, { "epoch": 0.012971872703303874, "grad_norm": 1.21875, "learning_rate": 0.001980450225835199, "loss": 0.2731, "step": 7316 }, { "epoch": 0.012975418868613689, "grad_norm": 0.59375, "learning_rate": 0.001980437885865165, "loss": 0.4425, "step": 7318 }, { "epoch": 0.012978965033923503, "grad_norm": 2.328125, "learning_rate": 0.0019804255420446067, "loss": 0.2614, "step": 7320 }, { "epoch": 0.01298251119923332, "grad_norm": 0.5625, "learning_rate": 0.001980413194373579, "loss": 0.4741, "step": 7322 }, { "epoch": 0.012986057364543134, "grad_norm": 0.455078125, "learning_rate": 0.001980400842852135, "loss": 0.2157, "step": 7324 }, { "epoch": 0.012989603529852949, "grad_norm": 0.3828125, "learning_rate": 0.0019803884874803296, "loss": 0.2669, "step": 7326 }, { "epoch": 0.012993149695162765, "grad_norm": 0.40234375, "learning_rate": 0.0019803761282582164, "loss": 0.209, "step": 7328 }, { "epoch": 0.01299669586047258, "grad_norm": 0.37109375, "learning_rate": 0.001980363765185849, "loss": 0.3217, "step": 7330 }, { "epoch": 0.013000242025782396, "grad_norm": 1.71875, "learning_rate": 0.0019803513982632817, "loss": 0.2535, "step": 7332 }, { "epoch": 0.01300378819109221, "grad_norm": 0.86328125, "learning_rate": 0.001980339027490569, "loss": 0.2472, "step": 7334 }, { "epoch": 0.013007334356402025, "grad_norm": 1.578125, "learning_rate": 0.0019803266528677648, "loss": 0.4076, "step": 7336 }, { "epoch": 0.013010880521711841, "grad_norm": 0.341796875, "learning_rate": 0.0019803142743949234, "loss": 0.1899, "step": 7338 }, { "epoch": 0.013014426687021656, "grad_norm": 0.185546875, "learning_rate": 0.0019803018920720983, "loss": 0.239, "step": 7340 }, { "epoch": 0.01301797285233147, "grad_norm": 1.640625, "learning_rate": 0.001980289505899344, "loss": 0.3933, "step": 7342 }, { "epoch": 0.013021519017641286, "grad_norm": 0.380859375, "learning_rate": 0.0019802771158767152, "loss": 0.2236, "step": 7344 }, { "epoch": 0.013025065182951101, "grad_norm": 0.28125, "learning_rate": 0.001980264722004265, "loss": 0.2797, "step": 7346 }, { "epoch": 0.013028611348260916, "grad_norm": 0.46875, "learning_rate": 0.0019802523242820488, "loss": 0.5111, "step": 7348 }, { "epoch": 0.013032157513570732, "grad_norm": 0.4765625, "learning_rate": 0.0019802399227101205, "loss": 0.293, "step": 7350 }, { "epoch": 0.013035703678880546, "grad_norm": 0.8828125, "learning_rate": 0.0019802275172885334, "loss": 0.279, "step": 7352 }, { "epoch": 0.013039249844190361, "grad_norm": 1.6171875, "learning_rate": 0.0019802151080173425, "loss": 0.3866, "step": 7354 }, { "epoch": 0.013042796009500177, "grad_norm": 0.53125, "learning_rate": 0.0019802026948966024, "loss": 0.2419, "step": 7356 }, { "epoch": 0.013046342174809992, "grad_norm": 0.4375, "learning_rate": 0.001980190277926367, "loss": 0.3264, "step": 7358 }, { "epoch": 0.013049888340119806, "grad_norm": 0.6171875, "learning_rate": 0.0019801778571066904, "loss": 0.2487, "step": 7360 }, { "epoch": 0.013053434505429623, "grad_norm": 0.33203125, "learning_rate": 0.001980165432437627, "loss": 0.2401, "step": 7362 }, { "epoch": 0.013056980670739437, "grad_norm": 0.515625, "learning_rate": 0.0019801530039192314, "loss": 0.3544, "step": 7364 }, { "epoch": 0.013060526836049254, "grad_norm": 0.73046875, "learning_rate": 0.0019801405715515574, "loss": 0.3347, "step": 7366 }, { "epoch": 0.013064073001359068, "grad_norm": 0.83984375, "learning_rate": 0.0019801281353346604, "loss": 0.2402, "step": 7368 }, { "epoch": 0.013067619166668883, "grad_norm": 1.140625, "learning_rate": 0.0019801156952685937, "loss": 0.2637, "step": 7370 }, { "epoch": 0.013071165331978699, "grad_norm": 0.322265625, "learning_rate": 0.0019801032513534125, "loss": 0.2193, "step": 7372 }, { "epoch": 0.013074711497288513, "grad_norm": 2.296875, "learning_rate": 0.001980090803589171, "loss": 0.2314, "step": 7374 }, { "epoch": 0.013078257662598328, "grad_norm": 0.921875, "learning_rate": 0.001980078351975923, "loss": 0.2706, "step": 7376 }, { "epoch": 0.013081803827908144, "grad_norm": 0.2275390625, "learning_rate": 0.001980065896513724, "loss": 0.2573, "step": 7378 }, { "epoch": 0.013085349993217959, "grad_norm": 1.8984375, "learning_rate": 0.001980053437202628, "loss": 0.3453, "step": 7380 }, { "epoch": 0.013088896158527773, "grad_norm": 7.6875, "learning_rate": 0.001980040974042689, "loss": 0.3178, "step": 7382 }, { "epoch": 0.01309244232383759, "grad_norm": 0.55078125, "learning_rate": 0.0019800285070339626, "loss": 0.291, "step": 7384 }, { "epoch": 0.013095988489147404, "grad_norm": 0.53125, "learning_rate": 0.001980016036176502, "loss": 0.1913, "step": 7386 }, { "epoch": 0.013099534654457219, "grad_norm": 0.373046875, "learning_rate": 0.001980003561470363, "loss": 0.1708, "step": 7388 }, { "epoch": 0.013103080819767035, "grad_norm": 1.65625, "learning_rate": 0.0019799910829155993, "loss": 0.2892, "step": 7390 }, { "epoch": 0.01310662698507685, "grad_norm": 1.4375, "learning_rate": 0.001979978600512266, "loss": 0.2598, "step": 7392 }, { "epoch": 0.013110173150386664, "grad_norm": 0.30859375, "learning_rate": 0.001979966114260417, "loss": 0.2125, "step": 7394 }, { "epoch": 0.01311371931569648, "grad_norm": 0.66796875, "learning_rate": 0.0019799536241601077, "loss": 0.2582, "step": 7396 }, { "epoch": 0.013117265481006295, "grad_norm": 0.8828125, "learning_rate": 0.0019799411302113927, "loss": 0.2937, "step": 7398 }, { "epoch": 0.013120811646316111, "grad_norm": 0.306640625, "learning_rate": 0.001979928632414326, "loss": 0.3104, "step": 7400 }, { "epoch": 0.013124357811625926, "grad_norm": 1.125, "learning_rate": 0.0019799161307689625, "loss": 0.2228, "step": 7402 }, { "epoch": 0.01312790397693574, "grad_norm": 0.57421875, "learning_rate": 0.001979903625275357, "loss": 0.3244, "step": 7404 }, { "epoch": 0.013131450142245557, "grad_norm": 0.359375, "learning_rate": 0.001979891115933564, "loss": 0.202, "step": 7406 }, { "epoch": 0.013134996307555371, "grad_norm": 0.349609375, "learning_rate": 0.0019798786027436384, "loss": 0.246, "step": 7408 }, { "epoch": 0.013138542472865186, "grad_norm": 0.51953125, "learning_rate": 0.001979866085705635, "loss": 0.2544, "step": 7410 }, { "epoch": 0.013142088638175002, "grad_norm": 0.5078125, "learning_rate": 0.0019798535648196084, "loss": 0.1871, "step": 7412 }, { "epoch": 0.013145634803484817, "grad_norm": 1.5625, "learning_rate": 0.0019798410400856136, "loss": 0.4191, "step": 7414 }, { "epoch": 0.013149180968794631, "grad_norm": 0.6953125, "learning_rate": 0.0019798285115037047, "loss": 0.3978, "step": 7416 }, { "epoch": 0.013152727134104448, "grad_norm": 1.2734375, "learning_rate": 0.0019798159790739376, "loss": 0.4776, "step": 7418 }, { "epoch": 0.013156273299414262, "grad_norm": 0.4375, "learning_rate": 0.001979803442796366, "loss": 0.2267, "step": 7420 }, { "epoch": 0.013159819464724077, "grad_norm": 0.458984375, "learning_rate": 0.001979790902671045, "loss": 0.2982, "step": 7422 }, { "epoch": 0.013163365630033893, "grad_norm": 0.8828125, "learning_rate": 0.0019797783586980302, "loss": 0.3043, "step": 7424 }, { "epoch": 0.013166911795343707, "grad_norm": 1.078125, "learning_rate": 0.0019797658108773753, "loss": 0.3142, "step": 7426 }, { "epoch": 0.013170457960653522, "grad_norm": 0.4296875, "learning_rate": 0.0019797532592091367, "loss": 0.2354, "step": 7428 }, { "epoch": 0.013174004125963338, "grad_norm": 0.361328125, "learning_rate": 0.0019797407036933677, "loss": 0.406, "step": 7430 }, { "epoch": 0.013177550291273153, "grad_norm": 1.2734375, "learning_rate": 0.001979728144330124, "loss": 0.2033, "step": 7432 }, { "epoch": 0.01318109645658297, "grad_norm": 1.2890625, "learning_rate": 0.0019797155811194602, "loss": 0.2822, "step": 7434 }, { "epoch": 0.013184642621892784, "grad_norm": 0.640625, "learning_rate": 0.0019797030140614315, "loss": 0.2742, "step": 7436 }, { "epoch": 0.013188188787202598, "grad_norm": 1.6328125, "learning_rate": 0.001979690443156093, "loss": 0.5442, "step": 7438 }, { "epoch": 0.013191734952512415, "grad_norm": 0.474609375, "learning_rate": 0.001979677868403499, "loss": 0.2336, "step": 7440 }, { "epoch": 0.013195281117822229, "grad_norm": 0.349609375, "learning_rate": 0.0019796652898037056, "loss": 0.2037, "step": 7442 }, { "epoch": 0.013198827283132044, "grad_norm": 0.341796875, "learning_rate": 0.001979652707356767, "loss": 0.2163, "step": 7444 }, { "epoch": 0.01320237344844186, "grad_norm": 0.310546875, "learning_rate": 0.0019796401210627384, "loss": 0.1865, "step": 7446 }, { "epoch": 0.013205919613751674, "grad_norm": 0.92578125, "learning_rate": 0.0019796275309216745, "loss": 0.4165, "step": 7448 }, { "epoch": 0.013209465779061489, "grad_norm": 0.404296875, "learning_rate": 0.0019796149369336316, "loss": 0.2459, "step": 7450 }, { "epoch": 0.013213011944371305, "grad_norm": 3.234375, "learning_rate": 0.0019796023390986632, "loss": 0.7228, "step": 7452 }, { "epoch": 0.01321655810968112, "grad_norm": 0.3984375, "learning_rate": 0.0019795897374168254, "loss": 0.281, "step": 7454 }, { "epoch": 0.013220104274990934, "grad_norm": 0.455078125, "learning_rate": 0.001979577131888173, "loss": 0.2723, "step": 7456 }, { "epoch": 0.01322365044030075, "grad_norm": 0.734375, "learning_rate": 0.0019795645225127606, "loss": 0.2263, "step": 7458 }, { "epoch": 0.013227196605610565, "grad_norm": 0.4140625, "learning_rate": 0.0019795519092906445, "loss": 0.2266, "step": 7460 }, { "epoch": 0.01323074277092038, "grad_norm": 2.515625, "learning_rate": 0.0019795392922218793, "loss": 0.3231, "step": 7462 }, { "epoch": 0.013234288936230196, "grad_norm": 0.40625, "learning_rate": 0.00197952667130652, "loss": 0.2287, "step": 7464 }, { "epoch": 0.01323783510154001, "grad_norm": 0.443359375, "learning_rate": 0.0019795140465446214, "loss": 0.2105, "step": 7466 }, { "epoch": 0.013241381266849827, "grad_norm": 1.09375, "learning_rate": 0.00197950141793624, "loss": 0.3577, "step": 7468 }, { "epoch": 0.013244927432159641, "grad_norm": 0.97265625, "learning_rate": 0.00197948878548143, "loss": 0.3316, "step": 7470 }, { "epoch": 0.013248473597469456, "grad_norm": 1.203125, "learning_rate": 0.0019794761491802467, "loss": 0.316, "step": 7472 }, { "epoch": 0.013252019762779272, "grad_norm": 0.318359375, "learning_rate": 0.001979463509032746, "loss": 0.2238, "step": 7474 }, { "epoch": 0.013255565928089087, "grad_norm": 1.515625, "learning_rate": 0.0019794508650389825, "loss": 0.2358, "step": 7476 }, { "epoch": 0.013259112093398901, "grad_norm": 1.0703125, "learning_rate": 0.0019794382171990114, "loss": 0.311, "step": 7478 }, { "epoch": 0.013262658258708718, "grad_norm": 0.298828125, "learning_rate": 0.001979425565512889, "loss": 0.3175, "step": 7480 }, { "epoch": 0.013266204424018532, "grad_norm": 0.41796875, "learning_rate": 0.00197941290998067, "loss": 0.2808, "step": 7482 }, { "epoch": 0.013269750589328347, "grad_norm": 0.78125, "learning_rate": 0.0019794002506024096, "loss": 0.1972, "step": 7484 }, { "epoch": 0.013273296754638163, "grad_norm": 0.27734375, "learning_rate": 0.001979387587378163, "loss": 0.1635, "step": 7486 }, { "epoch": 0.013276842919947978, "grad_norm": 1.0703125, "learning_rate": 0.0019793749203079864, "loss": 0.3265, "step": 7488 }, { "epoch": 0.013280389085257792, "grad_norm": 2.90625, "learning_rate": 0.0019793622493919343, "loss": 0.2144, "step": 7490 }, { "epoch": 0.013283935250567609, "grad_norm": 0.349609375, "learning_rate": 0.0019793495746300623, "loss": 0.245, "step": 7492 }, { "epoch": 0.013287481415877423, "grad_norm": 0.466796875, "learning_rate": 0.0019793368960224268, "loss": 0.2353, "step": 7494 }, { "epoch": 0.013291027581187238, "grad_norm": 0.546875, "learning_rate": 0.0019793242135690823, "loss": 0.2539, "step": 7496 }, { "epoch": 0.013294573746497054, "grad_norm": 0.431640625, "learning_rate": 0.001979311527270084, "loss": 0.2414, "step": 7498 }, { "epoch": 0.013298119911806868, "grad_norm": 0.75390625, "learning_rate": 0.0019792988371254883, "loss": 0.3139, "step": 7500 }, { "epoch": 0.013301666077116685, "grad_norm": 0.302734375, "learning_rate": 0.0019792861431353497, "loss": 0.2354, "step": 7502 }, { "epoch": 0.0133052122424265, "grad_norm": 0.451171875, "learning_rate": 0.0019792734452997248, "loss": 0.1684, "step": 7504 }, { "epoch": 0.013308758407736314, "grad_norm": 0.55078125, "learning_rate": 0.0019792607436186684, "loss": 0.2232, "step": 7506 }, { "epoch": 0.01331230457304613, "grad_norm": 0.51953125, "learning_rate": 0.0019792480380922367, "loss": 0.1877, "step": 7508 }, { "epoch": 0.013315850738355945, "grad_norm": 0.91015625, "learning_rate": 0.001979235328720484, "loss": 0.2452, "step": 7510 }, { "epoch": 0.01331939690366576, "grad_norm": 0.5078125, "learning_rate": 0.0019792226155034673, "loss": 0.3374, "step": 7512 }, { "epoch": 0.013322943068975576, "grad_norm": 0.53125, "learning_rate": 0.001979209898441241, "loss": 0.2133, "step": 7514 }, { "epoch": 0.01332648923428539, "grad_norm": 0.458984375, "learning_rate": 0.0019791971775338616, "loss": 0.2446, "step": 7516 }, { "epoch": 0.013330035399595205, "grad_norm": 0.337890625, "learning_rate": 0.0019791844527813846, "loss": 0.201, "step": 7518 }, { "epoch": 0.013333581564905021, "grad_norm": 0.458984375, "learning_rate": 0.0019791717241838657, "loss": 0.2199, "step": 7520 }, { "epoch": 0.013337127730214835, "grad_norm": 0.34375, "learning_rate": 0.00197915899174136, "loss": 0.2111, "step": 7522 }, { "epoch": 0.01334067389552465, "grad_norm": 1.3671875, "learning_rate": 0.001979146255453924, "loss": 0.2488, "step": 7524 }, { "epoch": 0.013344220060834466, "grad_norm": 0.416015625, "learning_rate": 0.0019791335153216123, "loss": 0.3305, "step": 7526 }, { "epoch": 0.013347766226144281, "grad_norm": 1.5234375, "learning_rate": 0.0019791207713444814, "loss": 0.273, "step": 7528 }, { "epoch": 0.013351312391454095, "grad_norm": 0.3046875, "learning_rate": 0.001979108023522587, "loss": 0.1712, "step": 7530 }, { "epoch": 0.013354858556763912, "grad_norm": 0.271484375, "learning_rate": 0.0019790952718559845, "loss": 0.1873, "step": 7532 }, { "epoch": 0.013358404722073726, "grad_norm": 3.9375, "learning_rate": 0.0019790825163447305, "loss": 0.2767, "step": 7534 }, { "epoch": 0.013361950887383543, "grad_norm": 2.265625, "learning_rate": 0.00197906975698888, "loss": 0.5543, "step": 7536 }, { "epoch": 0.013365497052693357, "grad_norm": 0.431640625, "learning_rate": 0.001979056993788489, "loss": 0.2476, "step": 7538 }, { "epoch": 0.013369043218003172, "grad_norm": 0.50390625, "learning_rate": 0.001979044226743613, "loss": 0.2713, "step": 7540 }, { "epoch": 0.013372589383312988, "grad_norm": 0.73046875, "learning_rate": 0.0019790314558543087, "loss": 0.4439, "step": 7542 }, { "epoch": 0.013376135548622803, "grad_norm": 0.9140625, "learning_rate": 0.001979018681120631, "loss": 0.2361, "step": 7544 }, { "epoch": 0.013379681713932617, "grad_norm": 0.66015625, "learning_rate": 0.001979005902542636, "loss": 0.286, "step": 7546 }, { "epoch": 0.013383227879242433, "grad_norm": 0.45703125, "learning_rate": 0.0019789931201203803, "loss": 0.224, "step": 7548 }, { "epoch": 0.013386774044552248, "grad_norm": 0.54296875, "learning_rate": 0.0019789803338539193, "loss": 0.2424, "step": 7550 }, { "epoch": 0.013390320209862062, "grad_norm": 0.71484375, "learning_rate": 0.0019789675437433085, "loss": 0.2961, "step": 7552 }, { "epoch": 0.013393866375171879, "grad_norm": 0.6484375, "learning_rate": 0.0019789547497886043, "loss": 0.2981, "step": 7554 }, { "epoch": 0.013397412540481693, "grad_norm": 0.6953125, "learning_rate": 0.0019789419519898625, "loss": 0.4272, "step": 7556 }, { "epoch": 0.013400958705791508, "grad_norm": 0.71484375, "learning_rate": 0.001978929150347139, "loss": 0.2879, "step": 7558 }, { "epoch": 0.013404504871101324, "grad_norm": 0.5, "learning_rate": 0.0019789163448604907, "loss": 0.2423, "step": 7560 }, { "epoch": 0.013408051036411139, "grad_norm": 0.8984375, "learning_rate": 0.0019789035355299725, "loss": 0.2264, "step": 7562 }, { "epoch": 0.013411597201720953, "grad_norm": 0.3046875, "learning_rate": 0.0019788907223556407, "loss": 0.1948, "step": 7564 }, { "epoch": 0.01341514336703077, "grad_norm": 0.3046875, "learning_rate": 0.001978877905337551, "loss": 0.2547, "step": 7566 }, { "epoch": 0.013418689532340584, "grad_norm": 0.84375, "learning_rate": 0.0019788650844757604, "loss": 0.2852, "step": 7568 }, { "epoch": 0.0134222356976504, "grad_norm": 0.462890625, "learning_rate": 0.0019788522597703243, "loss": 0.2168, "step": 7570 }, { "epoch": 0.013425781862960215, "grad_norm": 0.439453125, "learning_rate": 0.0019788394312212987, "loss": 0.219, "step": 7572 }, { "epoch": 0.01342932802827003, "grad_norm": 0.236328125, "learning_rate": 0.00197882659882874, "loss": 0.2216, "step": 7574 }, { "epoch": 0.013432874193579846, "grad_norm": 0.2412109375, "learning_rate": 0.0019788137625927045, "loss": 0.2031, "step": 7576 }, { "epoch": 0.01343642035888966, "grad_norm": 5.15625, "learning_rate": 0.001978800922513248, "loss": 0.3581, "step": 7578 }, { "epoch": 0.013439966524199475, "grad_norm": 0.94140625, "learning_rate": 0.0019787880785904263, "loss": 0.2773, "step": 7580 }, { "epoch": 0.013443512689509291, "grad_norm": 0.68359375, "learning_rate": 0.0019787752308242966, "loss": 0.2136, "step": 7582 }, { "epoch": 0.013447058854819106, "grad_norm": 0.37109375, "learning_rate": 0.001978762379214914, "loss": 0.2067, "step": 7584 }, { "epoch": 0.01345060502012892, "grad_norm": 0.83984375, "learning_rate": 0.0019787495237623353, "loss": 0.2701, "step": 7586 }, { "epoch": 0.013454151185438737, "grad_norm": 0.84765625, "learning_rate": 0.0019787366644666167, "loss": 0.246, "step": 7588 }, { "epoch": 0.013457697350748551, "grad_norm": 1.0390625, "learning_rate": 0.001978723801327815, "loss": 0.4068, "step": 7590 }, { "epoch": 0.013461243516058366, "grad_norm": 0.39453125, "learning_rate": 0.001978710934345985, "loss": 0.1986, "step": 7592 }, { "epoch": 0.013464789681368182, "grad_norm": 0.306640625, "learning_rate": 0.001978698063521184, "loss": 0.2285, "step": 7594 }, { "epoch": 0.013468335846677996, "grad_norm": 1.5859375, "learning_rate": 0.001978685188853468, "loss": 0.4398, "step": 7596 }, { "epoch": 0.013471882011987811, "grad_norm": 0.41015625, "learning_rate": 0.001978672310342893, "loss": 0.4547, "step": 7598 }, { "epoch": 0.013475428177297627, "grad_norm": 0.474609375, "learning_rate": 0.0019786594279895165, "loss": 0.2128, "step": 7600 }, { "epoch": 0.013478974342607442, "grad_norm": 0.81640625, "learning_rate": 0.0019786465417933937, "loss": 0.2281, "step": 7602 }, { "epoch": 0.013482520507917258, "grad_norm": 0.53125, "learning_rate": 0.0019786336517545813, "loss": 0.3014, "step": 7604 }, { "epoch": 0.013486066673227073, "grad_norm": 6.125, "learning_rate": 0.0019786207578731357, "loss": 0.2493, "step": 7606 }, { "epoch": 0.013489612838536887, "grad_norm": 0.56640625, "learning_rate": 0.0019786078601491132, "loss": 0.2138, "step": 7608 }, { "epoch": 0.013493159003846704, "grad_norm": 0.384765625, "learning_rate": 0.0019785949585825707, "loss": 0.3604, "step": 7610 }, { "epoch": 0.013496705169156518, "grad_norm": 1.4921875, "learning_rate": 0.0019785820531735636, "loss": 0.2503, "step": 7612 }, { "epoch": 0.013500251334466333, "grad_norm": 0.46875, "learning_rate": 0.0019785691439221493, "loss": 0.2542, "step": 7614 }, { "epoch": 0.013503797499776149, "grad_norm": 0.546875, "learning_rate": 0.0019785562308283836, "loss": 0.2516, "step": 7616 }, { "epoch": 0.013507343665085964, "grad_norm": 0.251953125, "learning_rate": 0.0019785433138923233, "loss": 0.2466, "step": 7618 }, { "epoch": 0.013510889830395778, "grad_norm": 0.37890625, "learning_rate": 0.0019785303931140253, "loss": 0.3439, "step": 7620 }, { "epoch": 0.013514435995705594, "grad_norm": 0.357421875, "learning_rate": 0.0019785174684935456, "loss": 0.2255, "step": 7622 }, { "epoch": 0.013517982161015409, "grad_norm": 0.376953125, "learning_rate": 0.0019785045400309404, "loss": 0.3415, "step": 7624 }, { "epoch": 0.013521528326325223, "grad_norm": 0.68359375, "learning_rate": 0.0019784916077262666, "loss": 0.2489, "step": 7626 }, { "epoch": 0.01352507449163504, "grad_norm": 0.484375, "learning_rate": 0.001978478671579581, "loss": 0.2358, "step": 7628 }, { "epoch": 0.013528620656944854, "grad_norm": 0.53125, "learning_rate": 0.00197846573159094, "loss": 0.2224, "step": 7630 }, { "epoch": 0.013532166822254669, "grad_norm": 3.578125, "learning_rate": 0.0019784527877604, "loss": 0.2525, "step": 7632 }, { "epoch": 0.013535712987564485, "grad_norm": 1.6171875, "learning_rate": 0.001978439840088018, "loss": 0.5032, "step": 7634 }, { "epoch": 0.0135392591528743, "grad_norm": 0.57421875, "learning_rate": 0.001978426888573851, "loss": 0.2922, "step": 7636 }, { "epoch": 0.013542805318184116, "grad_norm": 0.55078125, "learning_rate": 0.001978413933217954, "loss": 0.354, "step": 7638 }, { "epoch": 0.01354635148349393, "grad_norm": 1.5234375, "learning_rate": 0.001978400974020385, "loss": 0.53, "step": 7640 }, { "epoch": 0.013549897648803745, "grad_norm": 0.49609375, "learning_rate": 0.0019783880109812005, "loss": 0.222, "step": 7642 }, { "epoch": 0.013553443814113561, "grad_norm": 0.353515625, "learning_rate": 0.001978375044100457, "loss": 0.254, "step": 7644 }, { "epoch": 0.013556989979423376, "grad_norm": 0.59765625, "learning_rate": 0.0019783620733782115, "loss": 0.189, "step": 7646 }, { "epoch": 0.01356053614473319, "grad_norm": 0.53125, "learning_rate": 0.0019783490988145203, "loss": 0.3683, "step": 7648 }, { "epoch": 0.013564082310043007, "grad_norm": 0.455078125, "learning_rate": 0.00197833612040944, "loss": 0.2526, "step": 7650 }, { "epoch": 0.013567628475352821, "grad_norm": 0.267578125, "learning_rate": 0.001978323138163028, "loss": 0.2494, "step": 7652 }, { "epoch": 0.013571174640662636, "grad_norm": 0.6484375, "learning_rate": 0.001978310152075341, "loss": 0.3841, "step": 7654 }, { "epoch": 0.013574720805972452, "grad_norm": 1.1328125, "learning_rate": 0.0019782971621464356, "loss": 0.3076, "step": 7656 }, { "epoch": 0.013578266971282267, "grad_norm": 0.330078125, "learning_rate": 0.0019782841683763683, "loss": 0.2062, "step": 7658 }, { "epoch": 0.013581813136592081, "grad_norm": 0.73046875, "learning_rate": 0.0019782711707651965, "loss": 0.3023, "step": 7660 }, { "epoch": 0.013585359301901898, "grad_norm": 0.5078125, "learning_rate": 0.0019782581693129765, "loss": 0.2293, "step": 7662 }, { "epoch": 0.013588905467211712, "grad_norm": 1.03125, "learning_rate": 0.0019782451640197652, "loss": 0.2587, "step": 7664 }, { "epoch": 0.013592451632521527, "grad_norm": 0.4296875, "learning_rate": 0.0019782321548856203, "loss": 0.2545, "step": 7666 }, { "epoch": 0.013595997797831343, "grad_norm": 1.0390625, "learning_rate": 0.0019782191419105977, "loss": 0.2582, "step": 7668 }, { "epoch": 0.013599543963141158, "grad_norm": 0.322265625, "learning_rate": 0.001978206125094755, "loss": 0.1954, "step": 7670 }, { "epoch": 0.013603090128450974, "grad_norm": 0.47265625, "learning_rate": 0.0019781931044381483, "loss": 0.2613, "step": 7672 }, { "epoch": 0.013606636293760788, "grad_norm": 0.359375, "learning_rate": 0.0019781800799408356, "loss": 0.3365, "step": 7674 }, { "epoch": 0.013610182459070603, "grad_norm": 0.39453125, "learning_rate": 0.0019781670516028733, "loss": 0.1916, "step": 7676 }, { "epoch": 0.01361372862438042, "grad_norm": 0.5546875, "learning_rate": 0.001978154019424318, "loss": 0.2629, "step": 7678 }, { "epoch": 0.013617274789690234, "grad_norm": 0.75, "learning_rate": 0.0019781409834052272, "loss": 0.2863, "step": 7680 }, { "epoch": 0.013620820955000048, "grad_norm": 0.322265625, "learning_rate": 0.0019781279435456584, "loss": 0.2385, "step": 7682 }, { "epoch": 0.013624367120309865, "grad_norm": 0.3984375, "learning_rate": 0.001978114899845667, "loss": 0.2907, "step": 7684 }, { "epoch": 0.013627913285619679, "grad_norm": 0.46875, "learning_rate": 0.001978101852305312, "loss": 0.2181, "step": 7686 }, { "epoch": 0.013631459450929494, "grad_norm": 0.80859375, "learning_rate": 0.0019780888009246493, "loss": 0.2996, "step": 7688 }, { "epoch": 0.01363500561623931, "grad_norm": 0.421875, "learning_rate": 0.0019780757457037363, "loss": 0.2227, "step": 7690 }, { "epoch": 0.013638551781549125, "grad_norm": 0.5078125, "learning_rate": 0.00197806268664263, "loss": 0.2362, "step": 7692 }, { "epoch": 0.013642097946858939, "grad_norm": 0.369140625, "learning_rate": 0.0019780496237413875, "loss": 0.2549, "step": 7694 }, { "epoch": 0.013645644112168755, "grad_norm": 0.58203125, "learning_rate": 0.001978036557000066, "loss": 0.2283, "step": 7696 }, { "epoch": 0.01364919027747857, "grad_norm": 0.3125, "learning_rate": 0.0019780234864187228, "loss": 0.4069, "step": 7698 }, { "epoch": 0.013652736442788384, "grad_norm": 1.625, "learning_rate": 0.0019780104119974146, "loss": 0.2306, "step": 7700 }, { "epoch": 0.0136562826080982, "grad_norm": 1.734375, "learning_rate": 0.001977997333736199, "loss": 0.2526, "step": 7702 }, { "epoch": 0.013659828773408015, "grad_norm": 0.333984375, "learning_rate": 0.0019779842516351328, "loss": 0.2475, "step": 7704 }, { "epoch": 0.013663374938717832, "grad_norm": 0.255859375, "learning_rate": 0.0019779711656942736, "loss": 0.1859, "step": 7706 }, { "epoch": 0.013666921104027646, "grad_norm": 0.439453125, "learning_rate": 0.0019779580759136787, "loss": 0.2434, "step": 7708 }, { "epoch": 0.01367046726933746, "grad_norm": 2.21875, "learning_rate": 0.001977944982293405, "loss": 0.3137, "step": 7710 }, { "epoch": 0.013674013434647277, "grad_norm": 0.474609375, "learning_rate": 0.0019779318848335103, "loss": 0.2193, "step": 7712 }, { "epoch": 0.013677559599957092, "grad_norm": 0.37890625, "learning_rate": 0.0019779187835340514, "loss": 0.2377, "step": 7714 }, { "epoch": 0.013681105765266906, "grad_norm": 1.40625, "learning_rate": 0.001977905678395085, "loss": 0.2706, "step": 7716 }, { "epoch": 0.013684651930576722, "grad_norm": 0.337890625, "learning_rate": 0.0019778925694166703, "loss": 0.2012, "step": 7718 }, { "epoch": 0.013688198095886537, "grad_norm": 0.4140625, "learning_rate": 0.0019778794565988625, "loss": 0.3076, "step": 7720 }, { "epoch": 0.013691744261196351, "grad_norm": 0.88671875, "learning_rate": 0.00197786633994172, "loss": 0.2691, "step": 7722 }, { "epoch": 0.013695290426506168, "grad_norm": 0.271484375, "learning_rate": 0.0019778532194453004, "loss": 0.2075, "step": 7724 }, { "epoch": 0.013698836591815982, "grad_norm": 0.48828125, "learning_rate": 0.0019778400951096604, "loss": 0.5053, "step": 7726 }, { "epoch": 0.013702382757125797, "grad_norm": 1.71875, "learning_rate": 0.001977826966934858, "loss": 0.3419, "step": 7728 }, { "epoch": 0.013705928922435613, "grad_norm": 1.015625, "learning_rate": 0.0019778138349209502, "loss": 0.2519, "step": 7730 }, { "epoch": 0.013709475087745428, "grad_norm": 0.439453125, "learning_rate": 0.0019778006990679945, "loss": 0.212, "step": 7732 }, { "epoch": 0.013713021253055242, "grad_norm": 1.7109375, "learning_rate": 0.0019777875593760485, "loss": 0.2716, "step": 7734 }, { "epoch": 0.013716567418365059, "grad_norm": 2.34375, "learning_rate": 0.0019777744158451698, "loss": 0.225, "step": 7736 }, { "epoch": 0.013720113583674873, "grad_norm": 0.66796875, "learning_rate": 0.0019777612684754153, "loss": 0.273, "step": 7738 }, { "epoch": 0.01372365974898469, "grad_norm": 0.72265625, "learning_rate": 0.001977748117266843, "loss": 0.3076, "step": 7740 }, { "epoch": 0.013727205914294504, "grad_norm": 0.455078125, "learning_rate": 0.0019777349622195103, "loss": 0.2543, "step": 7742 }, { "epoch": 0.013730752079604319, "grad_norm": 0.703125, "learning_rate": 0.001977721803333475, "loss": 0.2176, "step": 7744 }, { "epoch": 0.013734298244914135, "grad_norm": 0.55078125, "learning_rate": 0.001977708640608794, "loss": 0.2822, "step": 7746 }, { "epoch": 0.01373784441022395, "grad_norm": 0.84765625, "learning_rate": 0.0019776954740455257, "loss": 0.2354, "step": 7748 }, { "epoch": 0.013741390575533764, "grad_norm": 2.609375, "learning_rate": 0.0019776823036437266, "loss": 0.2701, "step": 7750 }, { "epoch": 0.01374493674084358, "grad_norm": 2.34375, "learning_rate": 0.0019776691294034554, "loss": 0.3859, "step": 7752 }, { "epoch": 0.013748482906153395, "grad_norm": 0.56640625, "learning_rate": 0.001977655951324769, "loss": 0.306, "step": 7754 }, { "epoch": 0.01375202907146321, "grad_norm": 0.40625, "learning_rate": 0.0019776427694077254, "loss": 0.2485, "step": 7756 }, { "epoch": 0.013755575236773026, "grad_norm": 0.212890625, "learning_rate": 0.001977629583652382, "loss": 0.3498, "step": 7758 }, { "epoch": 0.01375912140208284, "grad_norm": 0.359375, "learning_rate": 0.0019776163940587966, "loss": 0.2204, "step": 7760 }, { "epoch": 0.013762667567392655, "grad_norm": 0.474609375, "learning_rate": 0.0019776032006270272, "loss": 0.1741, "step": 7762 }, { "epoch": 0.013766213732702471, "grad_norm": 0.609375, "learning_rate": 0.001977590003357131, "loss": 0.3222, "step": 7764 }, { "epoch": 0.013769759898012286, "grad_norm": 1.6796875, "learning_rate": 0.001977576802249166, "loss": 0.3199, "step": 7766 }, { "epoch": 0.0137733060633221, "grad_norm": 0.5, "learning_rate": 0.0019775635973031894, "loss": 0.2006, "step": 7768 }, { "epoch": 0.013776852228631916, "grad_norm": 1.0546875, "learning_rate": 0.0019775503885192595, "loss": 0.3139, "step": 7770 }, { "epoch": 0.013780398393941731, "grad_norm": 0.337890625, "learning_rate": 0.001977537175897434, "loss": 0.2011, "step": 7772 }, { "epoch": 0.013783944559251547, "grad_norm": 2.734375, "learning_rate": 0.0019775239594377707, "loss": 0.3913, "step": 7774 }, { "epoch": 0.013787490724561362, "grad_norm": 0.7265625, "learning_rate": 0.001977510739140327, "loss": 0.2466, "step": 7776 }, { "epoch": 0.013791036889871176, "grad_norm": 0.443359375, "learning_rate": 0.0019774975150051617, "loss": 0.3482, "step": 7778 }, { "epoch": 0.013794583055180993, "grad_norm": 1.484375, "learning_rate": 0.0019774842870323313, "loss": 0.3375, "step": 7780 }, { "epoch": 0.013798129220490807, "grad_norm": 0.8125, "learning_rate": 0.001977471055221895, "loss": 0.2647, "step": 7782 }, { "epoch": 0.013801675385800622, "grad_norm": 1.015625, "learning_rate": 0.0019774578195739098, "loss": 0.2908, "step": 7784 }, { "epoch": 0.013805221551110438, "grad_norm": 0.462890625, "learning_rate": 0.0019774445800884335, "loss": 0.2689, "step": 7786 }, { "epoch": 0.013808767716420253, "grad_norm": 0.4453125, "learning_rate": 0.0019774313367655243, "loss": 0.3056, "step": 7788 }, { "epoch": 0.013812313881730067, "grad_norm": 0.490234375, "learning_rate": 0.0019774180896052406, "loss": 0.1835, "step": 7790 }, { "epoch": 0.013815860047039883, "grad_norm": 0.375, "learning_rate": 0.0019774048386076394, "loss": 0.2325, "step": 7792 }, { "epoch": 0.013819406212349698, "grad_norm": 0.294921875, "learning_rate": 0.001977391583772779, "loss": 0.2261, "step": 7794 }, { "epoch": 0.013822952377659512, "grad_norm": 0.412109375, "learning_rate": 0.0019773783251007177, "loss": 0.2229, "step": 7796 }, { "epoch": 0.013826498542969329, "grad_norm": 0.73046875, "learning_rate": 0.001977365062591513, "loss": 0.2722, "step": 7798 }, { "epoch": 0.013830044708279143, "grad_norm": 0.94140625, "learning_rate": 0.0019773517962452234, "loss": 0.2715, "step": 7800 }, { "epoch": 0.013833590873588958, "grad_norm": 1.0390625, "learning_rate": 0.0019773385260619066, "loss": 0.2004, "step": 7802 }, { "epoch": 0.013837137038898774, "grad_norm": 0.5390625, "learning_rate": 0.001977325252041621, "loss": 0.3634, "step": 7804 }, { "epoch": 0.013840683204208589, "grad_norm": 0.87109375, "learning_rate": 0.001977311974184424, "loss": 0.2166, "step": 7806 }, { "epoch": 0.013844229369518405, "grad_norm": 1.7265625, "learning_rate": 0.001977298692490374, "loss": 0.3054, "step": 7808 }, { "epoch": 0.01384777553482822, "grad_norm": 0.5859375, "learning_rate": 0.001977285406959529, "loss": 0.313, "step": 7810 }, { "epoch": 0.013851321700138034, "grad_norm": 0.87109375, "learning_rate": 0.001977272117591948, "loss": 0.2316, "step": 7812 }, { "epoch": 0.01385486786544785, "grad_norm": 0.421875, "learning_rate": 0.0019772588243876874, "loss": 0.2951, "step": 7814 }, { "epoch": 0.013858414030757665, "grad_norm": 0.3046875, "learning_rate": 0.0019772455273468067, "loss": 0.2366, "step": 7816 }, { "epoch": 0.01386196019606748, "grad_norm": 0.294921875, "learning_rate": 0.0019772322264693634, "loss": 0.232, "step": 7818 }, { "epoch": 0.013865506361377296, "grad_norm": 2.3125, "learning_rate": 0.001977218921755416, "loss": 0.3022, "step": 7820 }, { "epoch": 0.01386905252668711, "grad_norm": 0.609375, "learning_rate": 0.001977205613205023, "loss": 0.2661, "step": 7822 }, { "epoch": 0.013872598691996925, "grad_norm": 1.3671875, "learning_rate": 0.0019771923008182414, "loss": 0.2097, "step": 7824 }, { "epoch": 0.013876144857306741, "grad_norm": 1.6328125, "learning_rate": 0.001977178984595131, "loss": 0.3737, "step": 7826 }, { "epoch": 0.013879691022616556, "grad_norm": 0.482421875, "learning_rate": 0.001977165664535749, "loss": 0.1889, "step": 7828 }, { "epoch": 0.01388323718792637, "grad_norm": 0.66796875, "learning_rate": 0.0019771523406401535, "loss": 0.592, "step": 7830 }, { "epoch": 0.013886783353236187, "grad_norm": 1.40625, "learning_rate": 0.0019771390129084037, "loss": 0.2485, "step": 7832 }, { "epoch": 0.013890329518546001, "grad_norm": 3.953125, "learning_rate": 0.001977125681340557, "loss": 0.351, "step": 7834 }, { "epoch": 0.013893875683855816, "grad_norm": 1.28125, "learning_rate": 0.001977112345936672, "loss": 0.3311, "step": 7836 }, { "epoch": 0.013897421849165632, "grad_norm": 1.3671875, "learning_rate": 0.0019770990066968076, "loss": 0.258, "step": 7838 }, { "epoch": 0.013900968014475447, "grad_norm": 0.30078125, "learning_rate": 0.001977085663621021, "loss": 0.2161, "step": 7840 }, { "epoch": 0.013904514179785261, "grad_norm": 0.61328125, "learning_rate": 0.0019770723167093717, "loss": 0.2532, "step": 7842 }, { "epoch": 0.013908060345095077, "grad_norm": 0.7578125, "learning_rate": 0.0019770589659619175, "loss": 0.2677, "step": 7844 }, { "epoch": 0.013911606510404892, "grad_norm": 1.0390625, "learning_rate": 0.0019770456113787165, "loss": 0.2911, "step": 7846 }, { "epoch": 0.013915152675714708, "grad_norm": 4.5625, "learning_rate": 0.0019770322529598277, "loss": 0.2344, "step": 7848 }, { "epoch": 0.013918698841024523, "grad_norm": 0.71484375, "learning_rate": 0.001977018890705309, "loss": 0.4691, "step": 7850 }, { "epoch": 0.013922245006334337, "grad_norm": 1.03125, "learning_rate": 0.0019770055246152193, "loss": 0.2439, "step": 7852 }, { "epoch": 0.013925791171644154, "grad_norm": 1.4140625, "learning_rate": 0.001976992154689617, "loss": 0.2845, "step": 7854 }, { "epoch": 0.013929337336953968, "grad_norm": 0.6015625, "learning_rate": 0.00197697878092856, "loss": 0.402, "step": 7856 }, { "epoch": 0.013932883502263783, "grad_norm": 0.2490234375, "learning_rate": 0.001976965403332108, "loss": 0.2305, "step": 7858 }, { "epoch": 0.013936429667573599, "grad_norm": 0.4765625, "learning_rate": 0.001976952021900318, "loss": 0.2462, "step": 7860 }, { "epoch": 0.013939975832883414, "grad_norm": 0.5078125, "learning_rate": 0.0019769386366332497, "loss": 0.2552, "step": 7862 }, { "epoch": 0.013943521998193228, "grad_norm": 1.8125, "learning_rate": 0.001976925247530961, "loss": 0.2692, "step": 7864 }, { "epoch": 0.013947068163503044, "grad_norm": 0.9375, "learning_rate": 0.0019769118545935106, "loss": 0.2507, "step": 7866 }, { "epoch": 0.013950614328812859, "grad_norm": 0.703125, "learning_rate": 0.001976898457820957, "loss": 0.2629, "step": 7868 }, { "epoch": 0.013954160494122674, "grad_norm": 0.92578125, "learning_rate": 0.0019768850572133593, "loss": 0.2842, "step": 7870 }, { "epoch": 0.01395770665943249, "grad_norm": 0.244140625, "learning_rate": 0.0019768716527707756, "loss": 0.2646, "step": 7872 }, { "epoch": 0.013961252824742304, "grad_norm": 0.431640625, "learning_rate": 0.0019768582444932644, "loss": 0.225, "step": 7874 }, { "epoch": 0.013964798990052119, "grad_norm": 0.396484375, "learning_rate": 0.0019768448323808844, "loss": 0.2122, "step": 7876 }, { "epoch": 0.013968345155361935, "grad_norm": 3.25, "learning_rate": 0.001976831416433695, "loss": 0.2633, "step": 7878 }, { "epoch": 0.01397189132067175, "grad_norm": 0.44140625, "learning_rate": 0.001976817996651754, "loss": 0.3261, "step": 7880 }, { "epoch": 0.013975437485981566, "grad_norm": 0.52734375, "learning_rate": 0.00197680457303512, "loss": 0.2683, "step": 7882 }, { "epoch": 0.01397898365129138, "grad_norm": 5.46875, "learning_rate": 0.0019767911455838526, "loss": 0.339, "step": 7884 }, { "epoch": 0.013982529816601195, "grad_norm": 0.396484375, "learning_rate": 0.00197677771429801, "loss": 0.2727, "step": 7886 }, { "epoch": 0.013986075981911011, "grad_norm": 0.380859375, "learning_rate": 0.001976764279177651, "loss": 0.2702, "step": 7888 }, { "epoch": 0.013989622147220826, "grad_norm": 0.85546875, "learning_rate": 0.0019767508402228347, "loss": 0.1854, "step": 7890 }, { "epoch": 0.01399316831253064, "grad_norm": 1.46875, "learning_rate": 0.0019767373974336187, "loss": 0.2519, "step": 7892 }, { "epoch": 0.013996714477840457, "grad_norm": 0.65625, "learning_rate": 0.001976723950810063, "loss": 0.2901, "step": 7894 }, { "epoch": 0.014000260643150271, "grad_norm": 0.423828125, "learning_rate": 0.001976710500352226, "loss": 0.2789, "step": 7896 }, { "epoch": 0.014003806808460086, "grad_norm": 0.462890625, "learning_rate": 0.0019766970460601664, "loss": 0.3878, "step": 7898 }, { "epoch": 0.014007352973769902, "grad_norm": 0.404296875, "learning_rate": 0.0019766835879339436, "loss": 0.2027, "step": 7900 }, { "epoch": 0.014010899139079717, "grad_norm": 0.56640625, "learning_rate": 0.0019766701259736155, "loss": 0.3059, "step": 7902 }, { "epoch": 0.014014445304389531, "grad_norm": 0.50390625, "learning_rate": 0.001976656660179242, "loss": 0.3784, "step": 7904 }, { "epoch": 0.014017991469699348, "grad_norm": 0.453125, "learning_rate": 0.0019766431905508808, "loss": 0.2456, "step": 7906 }, { "epoch": 0.014021537635009162, "grad_norm": 0.337890625, "learning_rate": 0.0019766297170885918, "loss": 0.2295, "step": 7908 }, { "epoch": 0.014025083800318977, "grad_norm": 0.41015625, "learning_rate": 0.001976616239792434, "loss": 0.2423, "step": 7910 }, { "epoch": 0.014028629965628793, "grad_norm": 0.8828125, "learning_rate": 0.0019766027586624654, "loss": 0.2471, "step": 7912 }, { "epoch": 0.014032176130938608, "grad_norm": 0.439453125, "learning_rate": 0.001976589273698746, "loss": 0.2488, "step": 7914 }, { "epoch": 0.014035722296248424, "grad_norm": 0.53125, "learning_rate": 0.001976575784901334, "loss": 0.2615, "step": 7916 }, { "epoch": 0.014039268461558238, "grad_norm": 0.7890625, "learning_rate": 0.001976562292270289, "loss": 0.2338, "step": 7918 }, { "epoch": 0.014042814626868053, "grad_norm": 0.296875, "learning_rate": 0.001976548795805669, "loss": 0.2023, "step": 7920 }, { "epoch": 0.01404636079217787, "grad_norm": 2.03125, "learning_rate": 0.0019765352955075344, "loss": 0.3934, "step": 7922 }, { "epoch": 0.014049906957487684, "grad_norm": 0.53515625, "learning_rate": 0.0019765217913759433, "loss": 0.2818, "step": 7924 }, { "epoch": 0.014053453122797498, "grad_norm": 1.296875, "learning_rate": 0.001976508283410955, "loss": 0.2061, "step": 7926 }, { "epoch": 0.014056999288107315, "grad_norm": 0.32421875, "learning_rate": 0.001976494771612629, "loss": 0.2473, "step": 7928 }, { "epoch": 0.01406054545341713, "grad_norm": 0.61328125, "learning_rate": 0.0019764812559810237, "loss": 0.2271, "step": 7930 }, { "epoch": 0.014064091618726944, "grad_norm": 2.28125, "learning_rate": 0.0019764677365161987, "loss": 0.2527, "step": 7932 }, { "epoch": 0.01406763778403676, "grad_norm": 1.125, "learning_rate": 0.0019764542132182125, "loss": 0.3249, "step": 7934 }, { "epoch": 0.014071183949346575, "grad_norm": 0.388671875, "learning_rate": 0.001976440686087125, "loss": 0.251, "step": 7936 }, { "epoch": 0.014074730114656389, "grad_norm": 2.828125, "learning_rate": 0.0019764271551229945, "loss": 0.2776, "step": 7938 }, { "epoch": 0.014078276279966205, "grad_norm": 1.3046875, "learning_rate": 0.0019764136203258816, "loss": 0.3305, "step": 7940 }, { "epoch": 0.01408182244527602, "grad_norm": 0.349609375, "learning_rate": 0.0019764000816958442, "loss": 0.2662, "step": 7942 }, { "epoch": 0.014085368610585835, "grad_norm": 2.234375, "learning_rate": 0.001976386539232942, "loss": 0.3798, "step": 7944 }, { "epoch": 0.01408891477589565, "grad_norm": 0.9296875, "learning_rate": 0.001976372992937234, "loss": 0.3456, "step": 7946 }, { "epoch": 0.014092460941205465, "grad_norm": 0.5078125, "learning_rate": 0.0019763594428087792, "loss": 0.2858, "step": 7948 }, { "epoch": 0.014096007106515282, "grad_norm": 0.3984375, "learning_rate": 0.001976345888847638, "loss": 0.3072, "step": 7950 }, { "epoch": 0.014099553271825096, "grad_norm": 0.8671875, "learning_rate": 0.0019763323310538687, "loss": 0.2565, "step": 7952 }, { "epoch": 0.01410309943713491, "grad_norm": 0.6875, "learning_rate": 0.0019763187694275307, "loss": 0.2388, "step": 7954 }, { "epoch": 0.014106645602444727, "grad_norm": 0.271484375, "learning_rate": 0.0019763052039686833, "loss": 0.2174, "step": 7956 }, { "epoch": 0.014110191767754542, "grad_norm": 0.60546875, "learning_rate": 0.001976291634677386, "loss": 0.2882, "step": 7958 }, { "epoch": 0.014113737933064356, "grad_norm": 1.0859375, "learning_rate": 0.001976278061553698, "loss": 0.2814, "step": 7960 }, { "epoch": 0.014117284098374172, "grad_norm": 0.43359375, "learning_rate": 0.0019762644845976794, "loss": 0.2552, "step": 7962 }, { "epoch": 0.014120830263683987, "grad_norm": 0.52734375, "learning_rate": 0.0019762509038093886, "loss": 0.282, "step": 7964 }, { "epoch": 0.014124376428993802, "grad_norm": 3.125, "learning_rate": 0.001976237319188885, "loss": 0.4814, "step": 7966 }, { "epoch": 0.014127922594303618, "grad_norm": 0.275390625, "learning_rate": 0.001976223730736229, "loss": 0.2168, "step": 7968 }, { "epoch": 0.014131468759613432, "grad_norm": 0.396484375, "learning_rate": 0.001976210138451479, "loss": 0.238, "step": 7970 }, { "epoch": 0.014135014924923247, "grad_norm": 0.482421875, "learning_rate": 0.0019761965423346945, "loss": 0.2386, "step": 7972 }, { "epoch": 0.014138561090233063, "grad_norm": 0.73046875, "learning_rate": 0.0019761829423859357, "loss": 0.4044, "step": 7974 }, { "epoch": 0.014142107255542878, "grad_norm": 0.94921875, "learning_rate": 0.0019761693386052613, "loss": 0.2591, "step": 7976 }, { "epoch": 0.014145653420852692, "grad_norm": 0.263671875, "learning_rate": 0.001976155730992731, "loss": 0.3086, "step": 7978 }, { "epoch": 0.014149199586162509, "grad_norm": 1.0859375, "learning_rate": 0.001976142119548405, "loss": 0.3166, "step": 7980 }, { "epoch": 0.014152745751472323, "grad_norm": 0.318359375, "learning_rate": 0.0019761285042723424, "loss": 0.2635, "step": 7982 }, { "epoch": 0.01415629191678214, "grad_norm": 0.3984375, "learning_rate": 0.0019761148851646024, "loss": 0.2934, "step": 7984 }, { "epoch": 0.014159838082091954, "grad_norm": 0.5546875, "learning_rate": 0.0019761012622252446, "loss": 0.2311, "step": 7986 }, { "epoch": 0.014163384247401769, "grad_norm": 0.369140625, "learning_rate": 0.001976087635454329, "loss": 0.2768, "step": 7988 }, { "epoch": 0.014166930412711585, "grad_norm": 1.1328125, "learning_rate": 0.001976074004851915, "loss": 0.3145, "step": 7990 }, { "epoch": 0.0141704765780214, "grad_norm": 0.953125, "learning_rate": 0.001976060370418062, "loss": 0.2376, "step": 7992 }, { "epoch": 0.014174022743331214, "grad_norm": 0.41015625, "learning_rate": 0.00197604673215283, "loss": 0.2532, "step": 7994 }, { "epoch": 0.01417756890864103, "grad_norm": 0.330078125, "learning_rate": 0.0019760330900562783, "loss": 0.2099, "step": 7996 }, { "epoch": 0.014181115073950845, "grad_norm": 0.64453125, "learning_rate": 0.001976019444128467, "loss": 0.3335, "step": 7998 }, { "epoch": 0.01418466123926066, "grad_norm": 0.3359375, "learning_rate": 0.001976005794369455, "loss": 0.2508, "step": 8000 }, { "epoch": 0.014188207404570476, "grad_norm": 0.59375, "learning_rate": 0.001975992140779303, "loss": 0.2161, "step": 8002 }, { "epoch": 0.01419175356988029, "grad_norm": 0.62890625, "learning_rate": 0.00197597848335807, "loss": 0.2582, "step": 8004 }, { "epoch": 0.014195299735190105, "grad_norm": 0.296875, "learning_rate": 0.001975964822105816, "loss": 0.2452, "step": 8006 }, { "epoch": 0.014198845900499921, "grad_norm": 0.32421875, "learning_rate": 0.0019759511570226007, "loss": 0.2437, "step": 8008 }, { "epoch": 0.014202392065809736, "grad_norm": 0.353515625, "learning_rate": 0.001975937488108484, "loss": 0.2512, "step": 8010 }, { "epoch": 0.01420593823111955, "grad_norm": 0.93359375, "learning_rate": 0.001975923815363525, "loss": 0.2971, "step": 8012 }, { "epoch": 0.014209484396429366, "grad_norm": 0.322265625, "learning_rate": 0.0019759101387877846, "loss": 0.2463, "step": 8014 }, { "epoch": 0.014213030561739181, "grad_norm": 3.09375, "learning_rate": 0.0019758964583813216, "loss": 0.3887, "step": 8016 }, { "epoch": 0.014216576727048997, "grad_norm": 0.58984375, "learning_rate": 0.0019758827741441966, "loss": 0.2735, "step": 8018 }, { "epoch": 0.014220122892358812, "grad_norm": 0.392578125, "learning_rate": 0.001975869086076469, "loss": 0.219, "step": 8020 }, { "epoch": 0.014223669057668626, "grad_norm": 1.75, "learning_rate": 0.0019758553941781986, "loss": 0.4032, "step": 8022 }, { "epoch": 0.014227215222978443, "grad_norm": 2.296875, "learning_rate": 0.0019758416984494457, "loss": 0.4561, "step": 8024 }, { "epoch": 0.014230761388288257, "grad_norm": 0.177734375, "learning_rate": 0.0019758279988902703, "loss": 0.2875, "step": 8026 }, { "epoch": 0.014234307553598072, "grad_norm": 0.625, "learning_rate": 0.0019758142955007313, "loss": 0.2761, "step": 8028 }, { "epoch": 0.014237853718907888, "grad_norm": 0.419921875, "learning_rate": 0.0019758005882808895, "loss": 0.3136, "step": 8030 }, { "epoch": 0.014241399884217703, "grad_norm": 0.50390625, "learning_rate": 0.001975786877230805, "loss": 0.2373, "step": 8032 }, { "epoch": 0.014244946049527517, "grad_norm": 0.294921875, "learning_rate": 0.001975773162350537, "loss": 0.2776, "step": 8034 }, { "epoch": 0.014248492214837333, "grad_norm": 2.4375, "learning_rate": 0.001975759443640146, "loss": 0.4026, "step": 8036 }, { "epoch": 0.014252038380147148, "grad_norm": 0.5546875, "learning_rate": 0.0019757457210996918, "loss": 0.2715, "step": 8038 }, { "epoch": 0.014255584545456963, "grad_norm": 0.96875, "learning_rate": 0.001975731994729235, "loss": 0.2807, "step": 8040 }, { "epoch": 0.014259130710766779, "grad_norm": 0.431640625, "learning_rate": 0.0019757182645288346, "loss": 0.2532, "step": 8042 }, { "epoch": 0.014262676876076593, "grad_norm": 0.337890625, "learning_rate": 0.0019757045304985513, "loss": 0.3072, "step": 8044 }, { "epoch": 0.014266223041386408, "grad_norm": 0.43359375, "learning_rate": 0.001975690792638445, "loss": 0.2501, "step": 8046 }, { "epoch": 0.014269769206696224, "grad_norm": 0.47265625, "learning_rate": 0.0019756770509485764, "loss": 0.3048, "step": 8048 }, { "epoch": 0.014273315372006039, "grad_norm": 0.765625, "learning_rate": 0.0019756633054290045, "loss": 0.3172, "step": 8050 }, { "epoch": 0.014276861537315855, "grad_norm": 0.291015625, "learning_rate": 0.00197564955607979, "loss": 0.2048, "step": 8052 }, { "epoch": 0.01428040770262567, "grad_norm": 2.3125, "learning_rate": 0.001975635802900993, "loss": 0.39, "step": 8054 }, { "epoch": 0.014283953867935484, "grad_norm": 0.50390625, "learning_rate": 0.0019756220458926736, "loss": 0.2126, "step": 8056 }, { "epoch": 0.0142875000332453, "grad_norm": 1.140625, "learning_rate": 0.0019756082850548922, "loss": 0.325, "step": 8058 }, { "epoch": 0.014291046198555115, "grad_norm": 0.53515625, "learning_rate": 0.0019755945203877084, "loss": 0.2655, "step": 8060 }, { "epoch": 0.01429459236386493, "grad_norm": 0.359375, "learning_rate": 0.001975580751891183, "loss": 0.2543, "step": 8062 }, { "epoch": 0.014298138529174746, "grad_norm": 0.470703125, "learning_rate": 0.0019755669795653765, "loss": 0.2692, "step": 8064 }, { "epoch": 0.01430168469448456, "grad_norm": 0.51953125, "learning_rate": 0.0019755532034103477, "loss": 0.2488, "step": 8066 }, { "epoch": 0.014305230859794375, "grad_norm": 0.40234375, "learning_rate": 0.001975539423426158, "loss": 0.2301, "step": 8068 }, { "epoch": 0.014308777025104191, "grad_norm": 0.33203125, "learning_rate": 0.001975525639612868, "loss": 0.2632, "step": 8070 }, { "epoch": 0.014312323190414006, "grad_norm": 0.220703125, "learning_rate": 0.001975511851970537, "loss": 0.2281, "step": 8072 }, { "epoch": 0.01431586935572382, "grad_norm": 0.421875, "learning_rate": 0.001975498060499226, "loss": 0.2813, "step": 8074 }, { "epoch": 0.014319415521033637, "grad_norm": 0.265625, "learning_rate": 0.0019754842651989943, "loss": 0.2185, "step": 8076 }, { "epoch": 0.014322961686343451, "grad_norm": 3.625, "learning_rate": 0.0019754704660699036, "loss": 0.3642, "step": 8078 }, { "epoch": 0.014326507851653266, "grad_norm": 0.380859375, "learning_rate": 0.0019754566631120136, "loss": 0.2144, "step": 8080 }, { "epoch": 0.014330054016963082, "grad_norm": 0.302734375, "learning_rate": 0.0019754428563253843, "loss": 0.2512, "step": 8082 }, { "epoch": 0.014333600182272897, "grad_norm": 0.384765625, "learning_rate": 0.001975429045710077, "loss": 0.2246, "step": 8084 }, { "epoch": 0.014337146347582713, "grad_norm": 0.458984375, "learning_rate": 0.001975415231266151, "loss": 0.2908, "step": 8086 }, { "epoch": 0.014340692512892527, "grad_norm": 1.4375, "learning_rate": 0.001975401412993668, "loss": 0.4582, "step": 8088 }, { "epoch": 0.014344238678202342, "grad_norm": 1.0, "learning_rate": 0.001975387590892687, "loss": 0.2912, "step": 8090 }, { "epoch": 0.014347784843512158, "grad_norm": 0.51171875, "learning_rate": 0.0019753737649632697, "loss": 0.2078, "step": 8092 }, { "epoch": 0.014351331008821973, "grad_norm": 0.3828125, "learning_rate": 0.0019753599352054754, "loss": 0.2609, "step": 8094 }, { "epoch": 0.014354877174131787, "grad_norm": 1.5703125, "learning_rate": 0.0019753461016193655, "loss": 0.3123, "step": 8096 }, { "epoch": 0.014358423339441604, "grad_norm": 0.69921875, "learning_rate": 0.0019753322642050005, "loss": 0.2319, "step": 8098 }, { "epoch": 0.014361969504751418, "grad_norm": 0.427734375, "learning_rate": 0.0019753184229624405, "loss": 0.214, "step": 8100 }, { "epoch": 0.014365515670061233, "grad_norm": 0.83203125, "learning_rate": 0.0019753045778917464, "loss": 0.2201, "step": 8102 }, { "epoch": 0.014369061835371049, "grad_norm": 0.30078125, "learning_rate": 0.0019752907289929777, "loss": 0.2869, "step": 8104 }, { "epoch": 0.014372608000680864, "grad_norm": 2.890625, "learning_rate": 0.0019752768762661965, "loss": 0.3829, "step": 8106 }, { "epoch": 0.014376154165990678, "grad_norm": 0.318359375, "learning_rate": 0.001975263019711462, "loss": 0.233, "step": 8108 }, { "epoch": 0.014379700331300494, "grad_norm": 0.6015625, "learning_rate": 0.0019752491593288363, "loss": 0.3708, "step": 8110 }, { "epoch": 0.014383246496610309, "grad_norm": 1.3203125, "learning_rate": 0.0019752352951183786, "loss": 0.4101, "step": 8112 }, { "epoch": 0.014386792661920124, "grad_norm": 2.765625, "learning_rate": 0.0019752214270801504, "loss": 0.2547, "step": 8114 }, { "epoch": 0.01439033882722994, "grad_norm": 0.65625, "learning_rate": 0.001975207555214212, "loss": 0.2971, "step": 8116 }, { "epoch": 0.014393884992539754, "grad_norm": 0.5078125, "learning_rate": 0.0019751936795206243, "loss": 0.2023, "step": 8118 }, { "epoch": 0.01439743115784957, "grad_norm": 0.392578125, "learning_rate": 0.001975179799999448, "loss": 0.2655, "step": 8120 }, { "epoch": 0.014400977323159385, "grad_norm": 0.341796875, "learning_rate": 0.001975165916650743, "loss": 0.4227, "step": 8122 }, { "epoch": 0.0144045234884692, "grad_norm": 0.423828125, "learning_rate": 0.0019751520294745708, "loss": 0.2429, "step": 8124 }, { "epoch": 0.014408069653779016, "grad_norm": 0.5, "learning_rate": 0.001975138138470992, "loss": 0.2966, "step": 8126 }, { "epoch": 0.01441161581908883, "grad_norm": 0.44140625, "learning_rate": 0.0019751242436400673, "loss": 0.3511, "step": 8128 }, { "epoch": 0.014415161984398645, "grad_norm": 0.2578125, "learning_rate": 0.0019751103449818573, "loss": 0.2727, "step": 8130 }, { "epoch": 0.014418708149708461, "grad_norm": 0.365234375, "learning_rate": 0.0019750964424964236, "loss": 0.2762, "step": 8132 }, { "epoch": 0.014422254315018276, "grad_norm": 0.298828125, "learning_rate": 0.001975082536183826, "loss": 0.2352, "step": 8134 }, { "epoch": 0.01442580048032809, "grad_norm": 0.28125, "learning_rate": 0.0019750686260441254, "loss": 0.1915, "step": 8136 }, { "epoch": 0.014429346645637907, "grad_norm": 1.9765625, "learning_rate": 0.0019750547120773836, "loss": 0.312, "step": 8138 }, { "epoch": 0.014432892810947721, "grad_norm": 1.4609375, "learning_rate": 0.0019750407942836605, "loss": 0.5173, "step": 8140 }, { "epoch": 0.014436438976257536, "grad_norm": 0.2890625, "learning_rate": 0.001975026872663017, "loss": 0.2093, "step": 8142 }, { "epoch": 0.014439985141567352, "grad_norm": 0.421875, "learning_rate": 0.0019750129472155143, "loss": 0.2657, "step": 8144 }, { "epoch": 0.014443531306877167, "grad_norm": 0.181640625, "learning_rate": 0.0019749990179412135, "loss": 0.2081, "step": 8146 }, { "epoch": 0.014447077472186981, "grad_norm": 0.6015625, "learning_rate": 0.001974985084840175, "loss": 0.1942, "step": 8148 }, { "epoch": 0.014450623637496798, "grad_norm": 0.50390625, "learning_rate": 0.0019749711479124603, "loss": 0.2726, "step": 8150 }, { "epoch": 0.014454169802806612, "grad_norm": 0.9375, "learning_rate": 0.0019749572071581295, "loss": 0.246, "step": 8152 }, { "epoch": 0.014457715968116428, "grad_norm": 0.39453125, "learning_rate": 0.001974943262577245, "loss": 0.2227, "step": 8154 }, { "epoch": 0.014461262133426243, "grad_norm": 0.35546875, "learning_rate": 0.0019749293141698657, "loss": 0.2184, "step": 8156 }, { "epoch": 0.014464808298736058, "grad_norm": 0.373046875, "learning_rate": 0.0019749153619360547, "loss": 0.2237, "step": 8158 }, { "epoch": 0.014468354464045874, "grad_norm": 1.9375, "learning_rate": 0.0019749014058758722, "loss": 0.3866, "step": 8160 }, { "epoch": 0.014471900629355688, "grad_norm": 0.142578125, "learning_rate": 0.0019748874459893785, "loss": 0.278, "step": 8162 }, { "epoch": 0.014475446794665503, "grad_norm": 0.24609375, "learning_rate": 0.0019748734822766355, "loss": 0.1951, "step": 8164 }, { "epoch": 0.01447899295997532, "grad_norm": 1.671875, "learning_rate": 0.0019748595147377045, "loss": 0.3066, "step": 8166 }, { "epoch": 0.014482539125285134, "grad_norm": 0.75390625, "learning_rate": 0.0019748455433726457, "loss": 0.2686, "step": 8168 }, { "epoch": 0.014486085290594948, "grad_norm": 0.51171875, "learning_rate": 0.0019748315681815207, "loss": 0.312, "step": 8170 }, { "epoch": 0.014489631455904765, "grad_norm": 0.244140625, "learning_rate": 0.001974817589164391, "loss": 0.2182, "step": 8172 }, { "epoch": 0.01449317762121458, "grad_norm": 0.56640625, "learning_rate": 0.001974803606321317, "loss": 0.2493, "step": 8174 }, { "epoch": 0.014496723786524394, "grad_norm": 0.58984375, "learning_rate": 0.0019747896196523605, "loss": 0.4058, "step": 8176 }, { "epoch": 0.01450026995183421, "grad_norm": 0.263671875, "learning_rate": 0.0019747756291575817, "loss": 0.3673, "step": 8178 }, { "epoch": 0.014503816117144025, "grad_norm": 0.8125, "learning_rate": 0.0019747616348370425, "loss": 0.1943, "step": 8180 }, { "epoch": 0.01450736228245384, "grad_norm": 1.7265625, "learning_rate": 0.0019747476366908045, "loss": 0.3337, "step": 8182 }, { "epoch": 0.014510908447763655, "grad_norm": 0.2451171875, "learning_rate": 0.0019747336347189282, "loss": 0.2392, "step": 8184 }, { "epoch": 0.01451445461307347, "grad_norm": 0.67578125, "learning_rate": 0.0019747196289214754, "loss": 0.2143, "step": 8186 }, { "epoch": 0.014518000778383286, "grad_norm": 0.30078125, "learning_rate": 0.001974705619298507, "loss": 0.2118, "step": 8188 }, { "epoch": 0.0145215469436931, "grad_norm": 0.40625, "learning_rate": 0.001974691605850084, "loss": 0.2607, "step": 8190 }, { "epoch": 0.014525093109002915, "grad_norm": 0.44140625, "learning_rate": 0.001974677588576268, "loss": 0.2566, "step": 8192 }, { "epoch": 0.014528639274312732, "grad_norm": 0.41796875, "learning_rate": 0.00197466356747712, "loss": 0.2181, "step": 8194 }, { "epoch": 0.014532185439622546, "grad_norm": 0.234375, "learning_rate": 0.001974649542552702, "loss": 0.2491, "step": 8196 }, { "epoch": 0.01453573160493236, "grad_norm": 1.8984375, "learning_rate": 0.0019746355138030754, "loss": 0.2448, "step": 8198 }, { "epoch": 0.014539277770242177, "grad_norm": 0.984375, "learning_rate": 0.0019746214812283005, "loss": 0.2635, "step": 8200 }, { "epoch": 0.014542823935551992, "grad_norm": 0.345703125, "learning_rate": 0.00197460744482844, "loss": 0.2502, "step": 8202 }, { "epoch": 0.014546370100861806, "grad_norm": 0.421875, "learning_rate": 0.0019745934046035535, "loss": 0.2543, "step": 8204 }, { "epoch": 0.014549916266171622, "grad_norm": 0.7109375, "learning_rate": 0.0019745793605537043, "loss": 0.299, "step": 8206 }, { "epoch": 0.014553462431481437, "grad_norm": 0.43359375, "learning_rate": 0.0019745653126789523, "loss": 0.3053, "step": 8208 }, { "epoch": 0.014557008596791252, "grad_norm": 0.43359375, "learning_rate": 0.0019745512609793603, "loss": 0.4376, "step": 8210 }, { "epoch": 0.014560554762101068, "grad_norm": 0.291015625, "learning_rate": 0.0019745372054549887, "loss": 0.2934, "step": 8212 }, { "epoch": 0.014564100927410882, "grad_norm": 0.267578125, "learning_rate": 0.0019745231461058997, "loss": 0.3122, "step": 8214 }, { "epoch": 0.014567647092720697, "grad_norm": 0.53125, "learning_rate": 0.0019745090829321544, "loss": 0.3878, "step": 8216 }, { "epoch": 0.014571193258030513, "grad_norm": 0.859375, "learning_rate": 0.001974495015933814, "loss": 0.2351, "step": 8218 }, { "epoch": 0.014574739423340328, "grad_norm": 0.32421875, "learning_rate": 0.0019744809451109403, "loss": 0.3699, "step": 8220 }, { "epoch": 0.014578285588650144, "grad_norm": 0.314453125, "learning_rate": 0.001974466870463595, "loss": 0.2613, "step": 8222 }, { "epoch": 0.014581831753959959, "grad_norm": 1.34375, "learning_rate": 0.00197445279199184, "loss": 0.2594, "step": 8224 }, { "epoch": 0.014585377919269773, "grad_norm": 0.6640625, "learning_rate": 0.001974438709695736, "loss": 0.3091, "step": 8226 }, { "epoch": 0.01458892408457959, "grad_norm": 0.330078125, "learning_rate": 0.001974424623575345, "loss": 0.1648, "step": 8228 }, { "epoch": 0.014592470249889404, "grad_norm": 0.306640625, "learning_rate": 0.001974410533630729, "loss": 0.3051, "step": 8230 }, { "epoch": 0.014596016415199219, "grad_norm": 0.3359375, "learning_rate": 0.0019743964398619487, "loss": 0.2379, "step": 8232 }, { "epoch": 0.014599562580509035, "grad_norm": 0.4609375, "learning_rate": 0.0019743823422690666, "loss": 0.2683, "step": 8234 }, { "epoch": 0.01460310874581885, "grad_norm": 0.55859375, "learning_rate": 0.001974368240852144, "loss": 0.2994, "step": 8236 }, { "epoch": 0.014606654911128664, "grad_norm": 0.365234375, "learning_rate": 0.0019743541356112425, "loss": 0.2508, "step": 8238 }, { "epoch": 0.01461020107643848, "grad_norm": 0.3984375, "learning_rate": 0.001974340026546424, "loss": 0.5068, "step": 8240 }, { "epoch": 0.014613747241748295, "grad_norm": 1.8515625, "learning_rate": 0.0019743259136577504, "loss": 0.2735, "step": 8242 }, { "epoch": 0.01461729340705811, "grad_norm": 0.388671875, "learning_rate": 0.001974311796945283, "loss": 0.2804, "step": 8244 }, { "epoch": 0.014620839572367926, "grad_norm": 0.4140625, "learning_rate": 0.0019742976764090835, "loss": 0.2208, "step": 8246 }, { "epoch": 0.01462438573767774, "grad_norm": 0.921875, "learning_rate": 0.001974283552049214, "loss": 0.28, "step": 8248 }, { "epoch": 0.014627931902987555, "grad_norm": 0.29296875, "learning_rate": 0.0019742694238657358, "loss": 0.2355, "step": 8250 }, { "epoch": 0.014631478068297371, "grad_norm": 0.419921875, "learning_rate": 0.001974255291858711, "loss": 0.2687, "step": 8252 }, { "epoch": 0.014635024233607186, "grad_norm": 0.6953125, "learning_rate": 0.0019742411560282015, "loss": 0.2353, "step": 8254 }, { "epoch": 0.014638570398917002, "grad_norm": 0.6953125, "learning_rate": 0.001974227016374269, "loss": 0.2276, "step": 8256 }, { "epoch": 0.014642116564226816, "grad_norm": 1.984375, "learning_rate": 0.001974212872896975, "loss": 0.4946, "step": 8258 }, { "epoch": 0.014645662729536631, "grad_norm": 0.6484375, "learning_rate": 0.0019741987255963824, "loss": 0.2401, "step": 8260 }, { "epoch": 0.014649208894846447, "grad_norm": 0.5546875, "learning_rate": 0.001974184574472552, "loss": 0.2202, "step": 8262 }, { "epoch": 0.014652755060156262, "grad_norm": 1.671875, "learning_rate": 0.0019741704195255457, "loss": 0.4242, "step": 8264 }, { "epoch": 0.014656301225466076, "grad_norm": 0.53125, "learning_rate": 0.0019741562607554263, "loss": 0.2719, "step": 8266 }, { "epoch": 0.014659847390775893, "grad_norm": 1.03125, "learning_rate": 0.0019741420981622547, "loss": 0.2843, "step": 8268 }, { "epoch": 0.014663393556085707, "grad_norm": 0.6171875, "learning_rate": 0.001974127931746094, "loss": 0.21, "step": 8270 }, { "epoch": 0.014666939721395522, "grad_norm": 0.78515625, "learning_rate": 0.001974113761507005, "loss": 0.239, "step": 8272 }, { "epoch": 0.014670485886705338, "grad_norm": 0.3515625, "learning_rate": 0.00197409958744505, "loss": 0.212, "step": 8274 }, { "epoch": 0.014674032052015153, "grad_norm": 0.455078125, "learning_rate": 0.0019740854095602915, "loss": 0.2199, "step": 8276 }, { "epoch": 0.014677578217324967, "grad_norm": 0.9140625, "learning_rate": 0.001974071227852791, "loss": 0.5372, "step": 8278 }, { "epoch": 0.014681124382634783, "grad_norm": 0.58984375, "learning_rate": 0.001974057042322611, "loss": 0.1938, "step": 8280 }, { "epoch": 0.014684670547944598, "grad_norm": 0.62890625, "learning_rate": 0.001974042852969813, "loss": 0.216, "step": 8282 }, { "epoch": 0.014688216713254413, "grad_norm": 0.34375, "learning_rate": 0.001974028659794459, "loss": 0.2577, "step": 8284 }, { "epoch": 0.014691762878564229, "grad_norm": 0.75390625, "learning_rate": 0.0019740144627966114, "loss": 0.2617, "step": 8286 }, { "epoch": 0.014695309043874043, "grad_norm": 0.66015625, "learning_rate": 0.0019740002619763326, "loss": 0.2305, "step": 8288 }, { "epoch": 0.01469885520918386, "grad_norm": 0.359375, "learning_rate": 0.0019739860573336843, "loss": 0.2246, "step": 8290 }, { "epoch": 0.014702401374493674, "grad_norm": 1.4921875, "learning_rate": 0.001973971848868728, "loss": 0.2925, "step": 8292 }, { "epoch": 0.014705947539803489, "grad_norm": 2.09375, "learning_rate": 0.0019739576365815272, "loss": 0.2573, "step": 8294 }, { "epoch": 0.014709493705113305, "grad_norm": 0.69921875, "learning_rate": 0.001973943420472143, "loss": 0.1889, "step": 8296 }, { "epoch": 0.01471303987042312, "grad_norm": 1.2109375, "learning_rate": 0.001973929200540638, "loss": 0.358, "step": 8298 }, { "epoch": 0.014716586035732934, "grad_norm": 0.5078125, "learning_rate": 0.001973914976787074, "loss": 0.2469, "step": 8300 }, { "epoch": 0.01472013220104275, "grad_norm": 0.294921875, "learning_rate": 0.0019739007492115142, "loss": 0.2914, "step": 8302 }, { "epoch": 0.014723678366352565, "grad_norm": 1.046875, "learning_rate": 0.0019738865178140197, "loss": 0.2945, "step": 8304 }, { "epoch": 0.01472722453166238, "grad_norm": 0.2236328125, "learning_rate": 0.001973872282594653, "loss": 0.1866, "step": 8306 }, { "epoch": 0.014730770696972196, "grad_norm": 0.56640625, "learning_rate": 0.001973858043553477, "loss": 0.2336, "step": 8308 }, { "epoch": 0.01473431686228201, "grad_norm": 0.30859375, "learning_rate": 0.0019738438006905537, "loss": 0.3078, "step": 8310 }, { "epoch": 0.014737863027591825, "grad_norm": 0.29296875, "learning_rate": 0.0019738295540059447, "loss": 0.2634, "step": 8312 }, { "epoch": 0.014741409192901641, "grad_norm": 0.65234375, "learning_rate": 0.0019738153034997126, "loss": 0.278, "step": 8314 }, { "epoch": 0.014744955358211456, "grad_norm": 0.279296875, "learning_rate": 0.0019738010491719203, "loss": 0.2528, "step": 8316 }, { "epoch": 0.01474850152352127, "grad_norm": 0.4453125, "learning_rate": 0.0019737867910226297, "loss": 0.2905, "step": 8318 }, { "epoch": 0.014752047688831087, "grad_norm": 0.228515625, "learning_rate": 0.0019737725290519034, "loss": 0.2447, "step": 8320 }, { "epoch": 0.014755593854140901, "grad_norm": 0.470703125, "learning_rate": 0.0019737582632598036, "loss": 0.3212, "step": 8322 }, { "epoch": 0.014759140019450718, "grad_norm": 0.451171875, "learning_rate": 0.0019737439936463926, "loss": 0.2487, "step": 8324 }, { "epoch": 0.014762686184760532, "grad_norm": 0.328125, "learning_rate": 0.001973729720211733, "loss": 0.2462, "step": 8326 }, { "epoch": 0.014766232350070347, "grad_norm": 0.421875, "learning_rate": 0.001973715442955887, "loss": 0.2829, "step": 8328 }, { "epoch": 0.014769778515380163, "grad_norm": 0.396484375, "learning_rate": 0.0019737011618789174, "loss": 0.2193, "step": 8330 }, { "epoch": 0.014773324680689977, "grad_norm": 6.5625, "learning_rate": 0.001973686876980886, "loss": 0.4112, "step": 8332 }, { "epoch": 0.014776870845999792, "grad_norm": 0.28125, "learning_rate": 0.001973672588261856, "loss": 0.3945, "step": 8334 }, { "epoch": 0.014780417011309608, "grad_norm": 0.412109375, "learning_rate": 0.0019736582957218898, "loss": 0.2134, "step": 8336 }, { "epoch": 0.014783963176619423, "grad_norm": 0.291015625, "learning_rate": 0.001973643999361049, "loss": 0.2612, "step": 8338 }, { "epoch": 0.014787509341929237, "grad_norm": 0.36328125, "learning_rate": 0.0019736296991793973, "loss": 0.2279, "step": 8340 }, { "epoch": 0.014791055507239054, "grad_norm": 0.69921875, "learning_rate": 0.0019736153951769968, "loss": 0.2541, "step": 8342 }, { "epoch": 0.014794601672548868, "grad_norm": 0.58203125, "learning_rate": 0.00197360108735391, "loss": 0.1865, "step": 8344 }, { "epoch": 0.014798147837858683, "grad_norm": 0.7890625, "learning_rate": 0.0019735867757101995, "loss": 0.5469, "step": 8346 }, { "epoch": 0.014801694003168499, "grad_norm": 0.8125, "learning_rate": 0.0019735724602459276, "loss": 0.208, "step": 8348 }, { "epoch": 0.014805240168478314, "grad_norm": 0.478515625, "learning_rate": 0.001973558140961157, "loss": 0.2023, "step": 8350 }, { "epoch": 0.014808786333788128, "grad_norm": 0.4375, "learning_rate": 0.001973543817855951, "loss": 0.2787, "step": 8352 }, { "epoch": 0.014812332499097944, "grad_norm": 0.421875, "learning_rate": 0.001973529490930372, "loss": 0.276, "step": 8354 }, { "epoch": 0.014815878664407759, "grad_norm": 0.57421875, "learning_rate": 0.0019735151601844815, "loss": 0.2239, "step": 8356 }, { "epoch": 0.014819424829717575, "grad_norm": 0.87109375, "learning_rate": 0.001973500825618344, "loss": 0.2352, "step": 8358 }, { "epoch": 0.01482297099502739, "grad_norm": 0.78515625, "learning_rate": 0.0019734864872320208, "loss": 0.2223, "step": 8360 }, { "epoch": 0.014826517160337204, "grad_norm": 0.3671875, "learning_rate": 0.0019734721450255753, "loss": 0.2678, "step": 8362 }, { "epoch": 0.01483006332564702, "grad_norm": 0.59375, "learning_rate": 0.0019734577989990697, "loss": 0.2802, "step": 8364 }, { "epoch": 0.014833609490956835, "grad_norm": 0.361328125, "learning_rate": 0.0019734434491525676, "loss": 0.1879, "step": 8366 }, { "epoch": 0.01483715565626665, "grad_norm": 0.3125, "learning_rate": 0.001973429095486131, "loss": 0.3326, "step": 8368 }, { "epoch": 0.014840701821576466, "grad_norm": 0.8125, "learning_rate": 0.0019734147379998224, "loss": 0.3008, "step": 8370 }, { "epoch": 0.01484424798688628, "grad_norm": 0.408203125, "learning_rate": 0.0019734003766937055, "loss": 0.168, "step": 8372 }, { "epoch": 0.014847794152196095, "grad_norm": 0.314453125, "learning_rate": 0.0019733860115678428, "loss": 0.2097, "step": 8374 }, { "epoch": 0.014851340317505912, "grad_norm": 1.234375, "learning_rate": 0.0019733716426222963, "loss": 0.2889, "step": 8376 }, { "epoch": 0.014854886482815726, "grad_norm": 0.60546875, "learning_rate": 0.0019733572698571304, "loss": 0.2187, "step": 8378 }, { "epoch": 0.01485843264812554, "grad_norm": 0.380859375, "learning_rate": 0.001973342893272407, "loss": 0.2115, "step": 8380 }, { "epoch": 0.014861978813435357, "grad_norm": 0.55078125, "learning_rate": 0.0019733285128681888, "loss": 0.2622, "step": 8382 }, { "epoch": 0.014865524978745171, "grad_norm": 1.109375, "learning_rate": 0.001973314128644539, "loss": 0.3458, "step": 8384 }, { "epoch": 0.014869071144054986, "grad_norm": 0.7109375, "learning_rate": 0.00197329974060152, "loss": 0.1967, "step": 8386 }, { "epoch": 0.014872617309364802, "grad_norm": 1.3828125, "learning_rate": 0.001973285348739196, "loss": 0.2537, "step": 8388 }, { "epoch": 0.014876163474674617, "grad_norm": 1.203125, "learning_rate": 0.001973270953057629, "loss": 0.4077, "step": 8390 }, { "epoch": 0.014879709639984433, "grad_norm": 0.5703125, "learning_rate": 0.0019732565535568822, "loss": 0.2731, "step": 8392 }, { "epoch": 0.014883255805294248, "grad_norm": 0.2734375, "learning_rate": 0.0019732421502370178, "loss": 0.2402, "step": 8394 }, { "epoch": 0.014886801970604062, "grad_norm": 0.25390625, "learning_rate": 0.0019732277430981, "loss": 0.2126, "step": 8396 }, { "epoch": 0.014890348135913879, "grad_norm": 0.5703125, "learning_rate": 0.0019732133321401914, "loss": 0.2764, "step": 8398 }, { "epoch": 0.014893894301223693, "grad_norm": 0.265625, "learning_rate": 0.0019731989173633548, "loss": 0.1994, "step": 8400 }, { "epoch": 0.014897440466533508, "grad_norm": 0.7421875, "learning_rate": 0.0019731844987676533, "loss": 0.2315, "step": 8402 }, { "epoch": 0.014900986631843324, "grad_norm": 0.31640625, "learning_rate": 0.00197317007635315, "loss": 0.2386, "step": 8404 }, { "epoch": 0.014904532797153138, "grad_norm": 0.34765625, "learning_rate": 0.001973155650119908, "loss": 0.2045, "step": 8406 }, { "epoch": 0.014908078962462953, "grad_norm": 0.3828125, "learning_rate": 0.00197314122006799, "loss": 0.2094, "step": 8408 }, { "epoch": 0.01491162512777277, "grad_norm": 0.75390625, "learning_rate": 0.0019731267861974604, "loss": 0.3569, "step": 8410 }, { "epoch": 0.014915171293082584, "grad_norm": 0.443359375, "learning_rate": 0.0019731123485083805, "loss": 0.237, "step": 8412 }, { "epoch": 0.014918717458392398, "grad_norm": 0.361328125, "learning_rate": 0.0019730979070008148, "loss": 0.3474, "step": 8414 }, { "epoch": 0.014922263623702215, "grad_norm": 0.74609375, "learning_rate": 0.001973083461674826, "loss": 0.3039, "step": 8416 }, { "epoch": 0.01492580978901203, "grad_norm": 1.203125, "learning_rate": 0.0019730690125304767, "loss": 0.2608, "step": 8418 }, { "epoch": 0.014929355954321844, "grad_norm": 0.7265625, "learning_rate": 0.0019730545595678314, "loss": 0.2046, "step": 8420 }, { "epoch": 0.01493290211963166, "grad_norm": 0.400390625, "learning_rate": 0.0019730401027869523, "loss": 0.2329, "step": 8422 }, { "epoch": 0.014936448284941475, "grad_norm": 0.53515625, "learning_rate": 0.001973025642187903, "loss": 0.2868, "step": 8424 }, { "epoch": 0.014939994450251291, "grad_norm": 0.400390625, "learning_rate": 0.001973011177770747, "loss": 0.2596, "step": 8426 }, { "epoch": 0.014943540615561106, "grad_norm": 1.8984375, "learning_rate": 0.0019729967095355465, "loss": 0.461, "step": 8428 }, { "epoch": 0.01494708678087092, "grad_norm": 4.15625, "learning_rate": 0.0019729822374823657, "loss": 0.3654, "step": 8430 }, { "epoch": 0.014950632946180736, "grad_norm": 0.55859375, "learning_rate": 0.001972967761611268, "loss": 0.4703, "step": 8432 }, { "epoch": 0.014954179111490551, "grad_norm": 0.81640625, "learning_rate": 0.001972953281922316, "loss": 0.2539, "step": 8434 }, { "epoch": 0.014957725276800365, "grad_norm": 0.3671875, "learning_rate": 0.0019729387984155737, "loss": 0.2693, "step": 8436 }, { "epoch": 0.014961271442110182, "grad_norm": 0.69140625, "learning_rate": 0.0019729243110911043, "loss": 0.3123, "step": 8438 }, { "epoch": 0.014964817607419996, "grad_norm": 1.0234375, "learning_rate": 0.001972909819948971, "loss": 0.2053, "step": 8440 }, { "epoch": 0.01496836377272981, "grad_norm": 0.5703125, "learning_rate": 0.0019728953249892366, "loss": 0.2043, "step": 8442 }, { "epoch": 0.014971909938039627, "grad_norm": 2.28125, "learning_rate": 0.0019728808262119654, "loss": 0.3579, "step": 8444 }, { "epoch": 0.014975456103349442, "grad_norm": 0.412109375, "learning_rate": 0.001972866323617221, "loss": 0.2023, "step": 8446 }, { "epoch": 0.014979002268659256, "grad_norm": 3.296875, "learning_rate": 0.0019728518172050656, "loss": 0.5548, "step": 8448 }, { "epoch": 0.014982548433969073, "grad_norm": 0.392578125, "learning_rate": 0.0019728373069755637, "loss": 0.2251, "step": 8450 }, { "epoch": 0.014986094599278887, "grad_norm": 0.8515625, "learning_rate": 0.001972822792928778, "loss": 0.2292, "step": 8452 }, { "epoch": 0.014989640764588702, "grad_norm": 0.69140625, "learning_rate": 0.001972808275064773, "loss": 0.2682, "step": 8454 }, { "epoch": 0.014993186929898518, "grad_norm": 0.80859375, "learning_rate": 0.0019727937533836116, "loss": 0.253, "step": 8456 }, { "epoch": 0.014996733095208332, "grad_norm": 0.5625, "learning_rate": 0.001972779227885357, "loss": 0.2328, "step": 8458 }, { "epoch": 0.015000279260518149, "grad_norm": 0.96875, "learning_rate": 0.001972764698570073, "loss": 0.2995, "step": 8460 }, { "epoch": 0.015003825425827963, "grad_norm": 0.349609375, "learning_rate": 0.0019727501654378233, "loss": 0.3771, "step": 8462 }, { "epoch": 0.015007371591137778, "grad_norm": 0.70703125, "learning_rate": 0.0019727356284886715, "loss": 0.2372, "step": 8464 }, { "epoch": 0.015010917756447594, "grad_norm": 0.5546875, "learning_rate": 0.0019727210877226804, "loss": 0.2373, "step": 8466 }, { "epoch": 0.015014463921757409, "grad_norm": 0.26953125, "learning_rate": 0.001972706543139915, "loss": 0.247, "step": 8468 }, { "epoch": 0.015018010087067223, "grad_norm": 1.4609375, "learning_rate": 0.0019726919947404375, "loss": 0.2755, "step": 8470 }, { "epoch": 0.01502155625237704, "grad_norm": 0.458984375, "learning_rate": 0.0019726774425243123, "loss": 0.1957, "step": 8472 }, { "epoch": 0.015025102417686854, "grad_norm": 0.828125, "learning_rate": 0.001972662886491603, "loss": 0.2928, "step": 8474 }, { "epoch": 0.015028648582996669, "grad_norm": 0.5546875, "learning_rate": 0.0019726483266423733, "loss": 0.1797, "step": 8476 }, { "epoch": 0.015032194748306485, "grad_norm": 0.37109375, "learning_rate": 0.001972633762976686, "loss": 0.2013, "step": 8478 }, { "epoch": 0.0150357409136163, "grad_norm": 0.337890625, "learning_rate": 0.0019726191954946063, "loss": 0.2781, "step": 8480 }, { "epoch": 0.015039287078926114, "grad_norm": 1.6640625, "learning_rate": 0.0019726046241961967, "loss": 0.2612, "step": 8482 }, { "epoch": 0.01504283324423593, "grad_norm": 2.296875, "learning_rate": 0.0019725900490815216, "loss": 0.2983, "step": 8484 }, { "epoch": 0.015046379409545745, "grad_norm": 0.35546875, "learning_rate": 0.0019725754701506444, "loss": 0.186, "step": 8486 }, { "epoch": 0.01504992557485556, "grad_norm": 1.796875, "learning_rate": 0.001972560887403629, "loss": 0.2253, "step": 8488 }, { "epoch": 0.015053471740165376, "grad_norm": 0.2490234375, "learning_rate": 0.0019725463008405386, "loss": 0.2858, "step": 8490 }, { "epoch": 0.01505701790547519, "grad_norm": 0.80078125, "learning_rate": 0.001972531710461438, "loss": 0.2459, "step": 8492 }, { "epoch": 0.015060564070785007, "grad_norm": 0.458984375, "learning_rate": 0.00197251711626639, "loss": 0.2563, "step": 8494 }, { "epoch": 0.015064110236094821, "grad_norm": 0.79296875, "learning_rate": 0.0019725025182554595, "loss": 0.3505, "step": 8496 }, { "epoch": 0.015067656401404636, "grad_norm": 1.6953125, "learning_rate": 0.0019724879164287096, "loss": 0.2718, "step": 8498 }, { "epoch": 0.015071202566714452, "grad_norm": 0.4453125, "learning_rate": 0.0019724733107862047, "loss": 0.3746, "step": 8500 }, { "epoch": 0.015074748732024267, "grad_norm": 0.88671875, "learning_rate": 0.001972458701328008, "loss": 0.2796, "step": 8502 }, { "epoch": 0.015078294897334081, "grad_norm": 0.396484375, "learning_rate": 0.001972444088054184, "loss": 0.1974, "step": 8504 }, { "epoch": 0.015081841062643897, "grad_norm": 1.4765625, "learning_rate": 0.001972429470964796, "loss": 0.259, "step": 8506 }, { "epoch": 0.015085387227953712, "grad_norm": 0.73828125, "learning_rate": 0.0019724148500599083, "loss": 0.3854, "step": 8508 }, { "epoch": 0.015088933393263526, "grad_norm": 0.78515625, "learning_rate": 0.001972400225339585, "loss": 0.2912, "step": 8510 }, { "epoch": 0.015092479558573343, "grad_norm": 2.046875, "learning_rate": 0.00197238559680389, "loss": 0.2514, "step": 8512 }, { "epoch": 0.015096025723883157, "grad_norm": 0.5234375, "learning_rate": 0.0019723709644528867, "loss": 0.2807, "step": 8514 }, { "epoch": 0.015099571889192972, "grad_norm": 0.458984375, "learning_rate": 0.00197235632828664, "loss": 0.251, "step": 8516 }, { "epoch": 0.015103118054502788, "grad_norm": 0.478515625, "learning_rate": 0.001972341688305213, "loss": 0.328, "step": 8518 }, { "epoch": 0.015106664219812603, "grad_norm": 0.5078125, "learning_rate": 0.00197232704450867, "loss": 0.2192, "step": 8520 }, { "epoch": 0.015110210385122417, "grad_norm": 1.5078125, "learning_rate": 0.001972312396897076, "loss": 0.2817, "step": 8522 }, { "epoch": 0.015113756550432234, "grad_norm": 0.8984375, "learning_rate": 0.001972297745470494, "loss": 0.221, "step": 8524 }, { "epoch": 0.015117302715742048, "grad_norm": 0.2734375, "learning_rate": 0.001972283090228988, "loss": 0.1854, "step": 8526 }, { "epoch": 0.015120848881051864, "grad_norm": 0.302734375, "learning_rate": 0.0019722684311726225, "loss": 0.2156, "step": 8528 }, { "epoch": 0.015124395046361679, "grad_norm": 0.45703125, "learning_rate": 0.0019722537683014617, "loss": 0.2754, "step": 8530 }, { "epoch": 0.015127941211671493, "grad_norm": 0.236328125, "learning_rate": 0.0019722391016155695, "loss": 0.2803, "step": 8532 }, { "epoch": 0.01513148737698131, "grad_norm": 0.439453125, "learning_rate": 0.0019722244311150103, "loss": 0.3082, "step": 8534 }, { "epoch": 0.015135033542291124, "grad_norm": 0.404296875, "learning_rate": 0.001972209756799848, "loss": 0.2524, "step": 8536 }, { "epoch": 0.015138579707600939, "grad_norm": 0.64453125, "learning_rate": 0.001972195078670147, "loss": 0.2449, "step": 8538 }, { "epoch": 0.015142125872910755, "grad_norm": 0.44140625, "learning_rate": 0.0019721803967259707, "loss": 0.251, "step": 8540 }, { "epoch": 0.01514567203822057, "grad_norm": 0.326171875, "learning_rate": 0.001972165710967385, "loss": 0.2039, "step": 8542 }, { "epoch": 0.015149218203530384, "grad_norm": 1.1171875, "learning_rate": 0.0019721510213944523, "loss": 0.2846, "step": 8544 }, { "epoch": 0.0151527643688402, "grad_norm": 0.333984375, "learning_rate": 0.001972136328007238, "loss": 0.2438, "step": 8546 }, { "epoch": 0.015156310534150015, "grad_norm": 1.1640625, "learning_rate": 0.001972121630805806, "loss": 0.2857, "step": 8548 }, { "epoch": 0.01515985669945983, "grad_norm": 0.326171875, "learning_rate": 0.0019721069297902205, "loss": 0.1888, "step": 8550 }, { "epoch": 0.015163402864769646, "grad_norm": 2.4375, "learning_rate": 0.001972092224960546, "loss": 0.3156, "step": 8552 }, { "epoch": 0.01516694903007946, "grad_norm": 0.431640625, "learning_rate": 0.001972077516316846, "loss": 0.2829, "step": 8554 }, { "epoch": 0.015170495195389275, "grad_norm": 0.55859375, "learning_rate": 0.0019720628038591864, "loss": 0.2099, "step": 8556 }, { "epoch": 0.015174041360699091, "grad_norm": 0.6484375, "learning_rate": 0.0019720480875876304, "loss": 0.287, "step": 8558 }, { "epoch": 0.015177587526008906, "grad_norm": 0.26953125, "learning_rate": 0.0019720333675022424, "loss": 0.2322, "step": 8560 }, { "epoch": 0.015181133691318722, "grad_norm": 0.474609375, "learning_rate": 0.001972018643603087, "loss": 0.2953, "step": 8562 }, { "epoch": 0.015184679856628537, "grad_norm": 0.22265625, "learning_rate": 0.0019720039158902286, "loss": 0.2044, "step": 8564 }, { "epoch": 0.015188226021938351, "grad_norm": 0.51953125, "learning_rate": 0.0019719891843637317, "loss": 0.2271, "step": 8566 }, { "epoch": 0.015191772187248168, "grad_norm": 1.0, "learning_rate": 0.0019719744490236607, "loss": 0.298, "step": 8568 }, { "epoch": 0.015195318352557982, "grad_norm": 0.412109375, "learning_rate": 0.0019719597098700795, "loss": 0.2512, "step": 8570 }, { "epoch": 0.015198864517867797, "grad_norm": 0.451171875, "learning_rate": 0.001971944966903054, "loss": 0.224, "step": 8572 }, { "epoch": 0.015202410683177613, "grad_norm": 2.53125, "learning_rate": 0.0019719302201226464, "loss": 0.3056, "step": 8574 }, { "epoch": 0.015205956848487428, "grad_norm": 0.435546875, "learning_rate": 0.0019719154695289234, "loss": 0.3421, "step": 8576 }, { "epoch": 0.015209503013797242, "grad_norm": 0.458984375, "learning_rate": 0.001971900715121948, "loss": 0.295, "step": 8578 }, { "epoch": 0.015213049179107058, "grad_norm": 0.494140625, "learning_rate": 0.001971885956901786, "loss": 0.2454, "step": 8580 }, { "epoch": 0.015216595344416873, "grad_norm": 0.4296875, "learning_rate": 0.0019718711948685007, "loss": 0.2786, "step": 8582 }, { "epoch": 0.015220141509726687, "grad_norm": 0.392578125, "learning_rate": 0.0019718564290221578, "loss": 0.2791, "step": 8584 }, { "epoch": 0.015223687675036504, "grad_norm": 0.5234375, "learning_rate": 0.001971841659362821, "loss": 0.2626, "step": 8586 }, { "epoch": 0.015227233840346318, "grad_norm": 0.34375, "learning_rate": 0.0019718268858905557, "loss": 0.1955, "step": 8588 }, { "epoch": 0.015230780005656133, "grad_norm": 0.58203125, "learning_rate": 0.001971812108605425, "loss": 0.2952, "step": 8590 }, { "epoch": 0.01523432617096595, "grad_norm": 0.66796875, "learning_rate": 0.001971797327507495, "loss": 0.2773, "step": 8592 }, { "epoch": 0.015237872336275764, "grad_norm": 0.5390625, "learning_rate": 0.0019717825425968304, "loss": 0.2793, "step": 8594 }, { "epoch": 0.01524141850158558, "grad_norm": 0.90625, "learning_rate": 0.001971767753873495, "loss": 0.2537, "step": 8596 }, { "epoch": 0.015244964666895395, "grad_norm": 0.474609375, "learning_rate": 0.0019717529613375536, "loss": 0.2958, "step": 8598 }, { "epoch": 0.015248510832205209, "grad_norm": 1.7578125, "learning_rate": 0.0019717381649890712, "loss": 0.3074, "step": 8600 }, { "epoch": 0.015252056997515025, "grad_norm": 0.2890625, "learning_rate": 0.0019717233648281125, "loss": 0.3694, "step": 8602 }, { "epoch": 0.01525560316282484, "grad_norm": 0.4296875, "learning_rate": 0.0019717085608547424, "loss": 0.2201, "step": 8604 }, { "epoch": 0.015259149328134654, "grad_norm": 0.265625, "learning_rate": 0.001971693753069025, "loss": 0.2749, "step": 8606 }, { "epoch": 0.01526269549344447, "grad_norm": 0.58203125, "learning_rate": 0.001971678941471026, "loss": 0.2294, "step": 8608 }, { "epoch": 0.015266241658754285, "grad_norm": 0.59375, "learning_rate": 0.0019716641260608086, "loss": 0.2263, "step": 8610 }, { "epoch": 0.0152697878240641, "grad_norm": 0.412109375, "learning_rate": 0.0019716493068384394, "loss": 0.2387, "step": 8612 }, { "epoch": 0.015273333989373916, "grad_norm": 0.326171875, "learning_rate": 0.0019716344838039824, "loss": 0.2777, "step": 8614 }, { "epoch": 0.01527688015468373, "grad_norm": 4.28125, "learning_rate": 0.0019716196569575027, "loss": 0.3567, "step": 8616 }, { "epoch": 0.015280426319993545, "grad_norm": 1.0234375, "learning_rate": 0.001971604826299065, "loss": 0.3221, "step": 8618 }, { "epoch": 0.015283972485303362, "grad_norm": 0.4296875, "learning_rate": 0.001971589991828733, "loss": 0.1985, "step": 8620 }, { "epoch": 0.015287518650613176, "grad_norm": 0.86328125, "learning_rate": 0.001971575153546574, "loss": 0.2859, "step": 8622 }, { "epoch": 0.01529106481592299, "grad_norm": 0.490234375, "learning_rate": 0.001971560311452651, "loss": 0.254, "step": 8624 }, { "epoch": 0.015294610981232807, "grad_norm": 1.828125, "learning_rate": 0.001971545465547029, "loss": 0.343, "step": 8626 }, { "epoch": 0.015298157146542622, "grad_norm": 0.49609375, "learning_rate": 0.001971530615829774, "loss": 0.3985, "step": 8628 }, { "epoch": 0.015301703311852438, "grad_norm": 0.3125, "learning_rate": 0.0019715157623009503, "loss": 0.2491, "step": 8630 }, { "epoch": 0.015305249477162252, "grad_norm": 0.353515625, "learning_rate": 0.0019715009049606227, "loss": 0.2225, "step": 8632 }, { "epoch": 0.015308795642472067, "grad_norm": 0.353515625, "learning_rate": 0.001971486043808856, "loss": 0.2721, "step": 8634 }, { "epoch": 0.015312341807781883, "grad_norm": 0.9375, "learning_rate": 0.001971471178845716, "loss": 0.2457, "step": 8636 }, { "epoch": 0.015315887973091698, "grad_norm": 0.369140625, "learning_rate": 0.001971456310071267, "loss": 0.2807, "step": 8638 }, { "epoch": 0.015319434138401512, "grad_norm": 0.439453125, "learning_rate": 0.001971441437485575, "loss": 0.2984, "step": 8640 }, { "epoch": 0.015322980303711329, "grad_norm": 0.296875, "learning_rate": 0.001971426561088704, "loss": 0.246, "step": 8642 }, { "epoch": 0.015326526469021143, "grad_norm": 0.5625, "learning_rate": 0.001971411680880719, "loss": 0.2864, "step": 8644 }, { "epoch": 0.015330072634330958, "grad_norm": 1.734375, "learning_rate": 0.001971396796861686, "loss": 0.2798, "step": 8646 }, { "epoch": 0.015333618799640774, "grad_norm": 0.4765625, "learning_rate": 0.0019713819090316693, "loss": 0.2598, "step": 8648 }, { "epoch": 0.015337164964950589, "grad_norm": 0.326171875, "learning_rate": 0.001971367017390734, "loss": 0.2185, "step": 8650 }, { "epoch": 0.015340711130260403, "grad_norm": 0.2265625, "learning_rate": 0.001971352121938946, "loss": 0.2564, "step": 8652 }, { "epoch": 0.01534425729557022, "grad_norm": 0.26953125, "learning_rate": 0.00197133722267637, "loss": 0.2426, "step": 8654 }, { "epoch": 0.015347803460880034, "grad_norm": 0.384765625, "learning_rate": 0.0019713223196030707, "loss": 0.2428, "step": 8656 }, { "epoch": 0.015351349626189848, "grad_norm": 1.2578125, "learning_rate": 0.001971307412719114, "loss": 0.5143, "step": 8658 }, { "epoch": 0.015354895791499665, "grad_norm": 0.6640625, "learning_rate": 0.0019712925020245646, "loss": 0.2882, "step": 8660 }, { "epoch": 0.01535844195680948, "grad_norm": 0.294921875, "learning_rate": 0.001971277587519488, "loss": 0.255, "step": 8662 }, { "epoch": 0.015361988122119296, "grad_norm": 0.53125, "learning_rate": 0.0019712626692039493, "loss": 0.2761, "step": 8664 }, { "epoch": 0.01536553428742911, "grad_norm": 0.314453125, "learning_rate": 0.001971247747078014, "loss": 0.2327, "step": 8666 }, { "epoch": 0.015369080452738925, "grad_norm": 0.345703125, "learning_rate": 0.001971232821141747, "loss": 0.2087, "step": 8668 }, { "epoch": 0.015372626618048741, "grad_norm": 0.36328125, "learning_rate": 0.0019712178913952137, "loss": 0.2459, "step": 8670 }, { "epoch": 0.015376172783358556, "grad_norm": 0.2236328125, "learning_rate": 0.0019712029578384796, "loss": 0.1898, "step": 8672 }, { "epoch": 0.01537971894866837, "grad_norm": 0.365234375, "learning_rate": 0.0019711880204716092, "loss": 0.2762, "step": 8674 }, { "epoch": 0.015383265113978186, "grad_norm": 0.416015625, "learning_rate": 0.001971173079294669, "loss": 0.3124, "step": 8676 }, { "epoch": 0.015386811279288001, "grad_norm": 0.50390625, "learning_rate": 0.0019711581343077236, "loss": 0.2829, "step": 8678 }, { "epoch": 0.015390357444597816, "grad_norm": 0.67578125, "learning_rate": 0.0019711431855108387, "loss": 0.2668, "step": 8680 }, { "epoch": 0.015393903609907632, "grad_norm": 0.8984375, "learning_rate": 0.001971128232904079, "loss": 0.2707, "step": 8682 }, { "epoch": 0.015397449775217446, "grad_norm": 1.8984375, "learning_rate": 0.001971113276487511, "loss": 0.3612, "step": 8684 }, { "epoch": 0.015400995940527261, "grad_norm": 0.76171875, "learning_rate": 0.001971098316261199, "loss": 0.2707, "step": 8686 }, { "epoch": 0.015404542105837077, "grad_norm": 0.3828125, "learning_rate": 0.0019710833522252097, "loss": 0.2478, "step": 8688 }, { "epoch": 0.015408088271146892, "grad_norm": 0.6953125, "learning_rate": 0.0019710683843796074, "loss": 0.2291, "step": 8690 }, { "epoch": 0.015411634436456706, "grad_norm": 0.703125, "learning_rate": 0.0019710534127244583, "loss": 0.2109, "step": 8692 }, { "epoch": 0.015415180601766523, "grad_norm": 0.47265625, "learning_rate": 0.001971038437259827, "loss": 0.231, "step": 8694 }, { "epoch": 0.015418726767076337, "grad_norm": 1.1640625, "learning_rate": 0.0019710234579857796, "loss": 0.3979, "step": 8696 }, { "epoch": 0.015422272932386153, "grad_norm": 0.640625, "learning_rate": 0.001971008474902382, "loss": 0.2348, "step": 8698 }, { "epoch": 0.015425819097695968, "grad_norm": 0.310546875, "learning_rate": 0.0019709934880096985, "loss": 0.2488, "step": 8700 }, { "epoch": 0.015429365263005783, "grad_norm": 0.26953125, "learning_rate": 0.001970978497307796, "loss": 0.2801, "step": 8702 }, { "epoch": 0.015432911428315599, "grad_norm": 0.462890625, "learning_rate": 0.0019709635027967396, "loss": 0.2071, "step": 8704 }, { "epoch": 0.015436457593625413, "grad_norm": 0.625, "learning_rate": 0.0019709485044765943, "loss": 0.2892, "step": 8706 }, { "epoch": 0.015440003758935228, "grad_norm": 0.28125, "learning_rate": 0.0019709335023474265, "loss": 0.1877, "step": 8708 }, { "epoch": 0.015443549924245044, "grad_norm": 1.875, "learning_rate": 0.0019709184964093016, "loss": 0.4012, "step": 8710 }, { "epoch": 0.015447096089554859, "grad_norm": 0.7421875, "learning_rate": 0.0019709034866622847, "loss": 0.2192, "step": 8712 }, { "epoch": 0.015450642254864673, "grad_norm": 1.3984375, "learning_rate": 0.0019708884731064417, "loss": 0.4603, "step": 8714 }, { "epoch": 0.01545418842017449, "grad_norm": 0.2373046875, "learning_rate": 0.0019708734557418385, "loss": 0.2656, "step": 8716 }, { "epoch": 0.015457734585484304, "grad_norm": 0.5234375, "learning_rate": 0.001970858434568541, "loss": 0.2632, "step": 8718 }, { "epoch": 0.015461280750794119, "grad_norm": 0.59375, "learning_rate": 0.0019708434095866145, "loss": 0.213, "step": 8720 }, { "epoch": 0.015464826916103935, "grad_norm": 1.3203125, "learning_rate": 0.0019708283807961247, "loss": 0.2399, "step": 8722 }, { "epoch": 0.01546837308141375, "grad_norm": 0.453125, "learning_rate": 0.001970813348197137, "loss": 0.6017, "step": 8724 }, { "epoch": 0.015471919246723564, "grad_norm": 0.73828125, "learning_rate": 0.0019707983117897183, "loss": 0.2586, "step": 8726 }, { "epoch": 0.01547546541203338, "grad_norm": 0.8359375, "learning_rate": 0.0019707832715739333, "loss": 0.4973, "step": 8728 }, { "epoch": 0.015479011577343195, "grad_norm": 1.2265625, "learning_rate": 0.001970768227549848, "loss": 0.3071, "step": 8730 }, { "epoch": 0.015482557742653011, "grad_norm": 0.412109375, "learning_rate": 0.001970753179717528, "loss": 0.2667, "step": 8732 }, { "epoch": 0.015486103907962826, "grad_norm": 0.443359375, "learning_rate": 0.0019707381280770395, "loss": 0.2113, "step": 8734 }, { "epoch": 0.01548965007327264, "grad_norm": 0.9453125, "learning_rate": 0.0019707230726284486, "loss": 0.2516, "step": 8736 }, { "epoch": 0.015493196238582457, "grad_norm": 0.287109375, "learning_rate": 0.0019707080133718204, "loss": 0.2966, "step": 8738 }, { "epoch": 0.015496742403892271, "grad_norm": 0.458984375, "learning_rate": 0.0019706929503072214, "loss": 0.2895, "step": 8740 }, { "epoch": 0.015500288569202086, "grad_norm": 0.369140625, "learning_rate": 0.001970677883434717, "loss": 0.2919, "step": 8742 }, { "epoch": 0.015503834734511902, "grad_norm": 1.828125, "learning_rate": 0.001970662812754373, "loss": 0.365, "step": 8744 }, { "epoch": 0.015507380899821717, "grad_norm": 0.6015625, "learning_rate": 0.001970647738266256, "loss": 0.1899, "step": 8746 }, { "epoch": 0.015510927065131531, "grad_norm": 0.7734375, "learning_rate": 0.0019706326599704315, "loss": 0.253, "step": 8748 }, { "epoch": 0.015514473230441347, "grad_norm": 0.33984375, "learning_rate": 0.0019706175778669652, "loss": 0.1949, "step": 8750 }, { "epoch": 0.015518019395751162, "grad_norm": 0.48046875, "learning_rate": 0.0019706024919559236, "loss": 0.2643, "step": 8752 }, { "epoch": 0.015521565561060977, "grad_norm": 0.484375, "learning_rate": 0.001970587402237372, "loss": 0.2585, "step": 8754 }, { "epoch": 0.015525111726370793, "grad_norm": 1.0, "learning_rate": 0.0019705723087113775, "loss": 0.3023, "step": 8756 }, { "epoch": 0.015528657891680607, "grad_norm": 0.37109375, "learning_rate": 0.001970557211378005, "loss": 0.2703, "step": 8758 }, { "epoch": 0.015532204056990422, "grad_norm": 0.359375, "learning_rate": 0.001970542110237321, "loss": 0.3066, "step": 8760 }, { "epoch": 0.015535750222300238, "grad_norm": 2.828125, "learning_rate": 0.0019705270052893914, "loss": 0.3463, "step": 8762 }, { "epoch": 0.015539296387610053, "grad_norm": 1.2734375, "learning_rate": 0.0019705118965342825, "loss": 0.3042, "step": 8764 }, { "epoch": 0.015542842552919869, "grad_norm": 2.359375, "learning_rate": 0.00197049678397206, "loss": 0.3152, "step": 8766 }, { "epoch": 0.015546388718229684, "grad_norm": 1.6640625, "learning_rate": 0.0019704816676027904, "loss": 0.4442, "step": 8768 }, { "epoch": 0.015549934883539498, "grad_norm": 3.171875, "learning_rate": 0.001970466547426539, "loss": 0.45, "step": 8770 }, { "epoch": 0.015553481048849314, "grad_norm": 1.703125, "learning_rate": 0.0019704514234433735, "loss": 0.2297, "step": 8772 }, { "epoch": 0.015557027214159129, "grad_norm": 1.28125, "learning_rate": 0.001970436295653359, "loss": 0.2423, "step": 8774 }, { "epoch": 0.015560573379468944, "grad_norm": 1.734375, "learning_rate": 0.0019704211640565613, "loss": 0.2454, "step": 8776 }, { "epoch": 0.01556411954477876, "grad_norm": 1.0234375, "learning_rate": 0.001970406028653047, "loss": 0.2548, "step": 8778 }, { "epoch": 0.015567665710088574, "grad_norm": 0.2490234375, "learning_rate": 0.0019703908894428824, "loss": 0.3037, "step": 8780 }, { "epoch": 0.015571211875398389, "grad_norm": 1.15625, "learning_rate": 0.001970375746426134, "loss": 0.2951, "step": 8782 }, { "epoch": 0.015574758040708205, "grad_norm": 0.40234375, "learning_rate": 0.001970360599602867, "loss": 0.2847, "step": 8784 }, { "epoch": 0.01557830420601802, "grad_norm": 0.431640625, "learning_rate": 0.001970345448973149, "loss": 0.1947, "step": 8786 }, { "epoch": 0.015581850371327834, "grad_norm": 0.42578125, "learning_rate": 0.001970330294537045, "loss": 0.2164, "step": 8788 }, { "epoch": 0.01558539653663765, "grad_norm": 0.6875, "learning_rate": 0.001970315136294622, "loss": 0.4597, "step": 8790 }, { "epoch": 0.015588942701947465, "grad_norm": 0.47265625, "learning_rate": 0.0019702999742459465, "loss": 0.2569, "step": 8792 }, { "epoch": 0.01559248886725728, "grad_norm": 1.8125, "learning_rate": 0.001970284808391084, "loss": 0.3938, "step": 8794 }, { "epoch": 0.015596035032567096, "grad_norm": 0.287109375, "learning_rate": 0.0019702696387301015, "loss": 0.1909, "step": 8796 }, { "epoch": 0.01559958119787691, "grad_norm": 1.015625, "learning_rate": 0.0019702544652630653, "loss": 0.2431, "step": 8798 }, { "epoch": 0.015603127363186727, "grad_norm": 0.40234375, "learning_rate": 0.001970239287990041, "loss": 0.2457, "step": 8800 }, { "epoch": 0.015606673528496541, "grad_norm": 1.4140625, "learning_rate": 0.001970224106911096, "loss": 0.2552, "step": 8802 }, { "epoch": 0.015610219693806356, "grad_norm": 0.4765625, "learning_rate": 0.001970208922026296, "loss": 0.2023, "step": 8804 }, { "epoch": 0.015613765859116172, "grad_norm": 0.52734375, "learning_rate": 0.001970193733335708, "loss": 0.2378, "step": 8806 }, { "epoch": 0.015617312024425987, "grad_norm": 0.55859375, "learning_rate": 0.001970178540839398, "loss": 0.3996, "step": 8808 }, { "epoch": 0.015620858189735801, "grad_norm": 0.50390625, "learning_rate": 0.0019701633445374325, "loss": 0.1683, "step": 8810 }, { "epoch": 0.015624404355045618, "grad_norm": 1.1875, "learning_rate": 0.0019701481444298775, "loss": 0.3738, "step": 8812 }, { "epoch": 0.01562795052035543, "grad_norm": 0.73828125, "learning_rate": 0.0019701329405168, "loss": 0.521, "step": 8814 }, { "epoch": 0.01563149668566525, "grad_norm": 0.7265625, "learning_rate": 0.001970117732798267, "loss": 0.4277, "step": 8816 }, { "epoch": 0.015635042850975063, "grad_norm": 0.314453125, "learning_rate": 0.001970102521274344, "loss": 0.2433, "step": 8818 }, { "epoch": 0.015638589016284878, "grad_norm": 0.322265625, "learning_rate": 0.0019700873059450984, "loss": 0.2382, "step": 8820 }, { "epoch": 0.015642135181594692, "grad_norm": 0.2431640625, "learning_rate": 0.0019700720868105963, "loss": 0.2892, "step": 8822 }, { "epoch": 0.015645681346904507, "grad_norm": 0.392578125, "learning_rate": 0.001970056863870904, "loss": 0.2265, "step": 8824 }, { "epoch": 0.015649227512214325, "grad_norm": 0.357421875, "learning_rate": 0.0019700416371260885, "loss": 0.2546, "step": 8826 }, { "epoch": 0.01565277367752414, "grad_norm": 2.359375, "learning_rate": 0.001970026406576216, "loss": 0.3298, "step": 8828 }, { "epoch": 0.015656319842833954, "grad_norm": 0.42578125, "learning_rate": 0.0019700111722213537, "loss": 0.304, "step": 8830 }, { "epoch": 0.01565986600814377, "grad_norm": 0.62890625, "learning_rate": 0.001969995934061568, "loss": 0.2647, "step": 8832 }, { "epoch": 0.015663412173453583, "grad_norm": 0.291015625, "learning_rate": 0.0019699806920969254, "loss": 0.2051, "step": 8834 }, { "epoch": 0.015666958338763397, "grad_norm": 1.1171875, "learning_rate": 0.0019699654463274925, "loss": 0.3798, "step": 8836 }, { "epoch": 0.015670504504073215, "grad_norm": 3.078125, "learning_rate": 0.0019699501967533357, "loss": 0.2796, "step": 8838 }, { "epoch": 0.01567405066938303, "grad_norm": 3.953125, "learning_rate": 0.0019699349433745226, "loss": 0.3843, "step": 8840 }, { "epoch": 0.015677596834692845, "grad_norm": 0.447265625, "learning_rate": 0.001969919686191119, "loss": 0.3353, "step": 8842 }, { "epoch": 0.01568114300000266, "grad_norm": 0.58984375, "learning_rate": 0.0019699044252031923, "loss": 0.193, "step": 8844 }, { "epoch": 0.015684689165312474, "grad_norm": 0.77734375, "learning_rate": 0.001969889160410809, "loss": 0.4398, "step": 8846 }, { "epoch": 0.015688235330622288, "grad_norm": 0.546875, "learning_rate": 0.0019698738918140354, "loss": 0.2342, "step": 8848 }, { "epoch": 0.015691781495932106, "grad_norm": 0.84765625, "learning_rate": 0.001969858619412939, "loss": 0.3275, "step": 8850 }, { "epoch": 0.01569532766124192, "grad_norm": 0.3828125, "learning_rate": 0.001969843343207586, "loss": 0.2707, "step": 8852 }, { "epoch": 0.015698873826551735, "grad_norm": 0.75390625, "learning_rate": 0.001969828063198044, "loss": 0.2012, "step": 8854 }, { "epoch": 0.01570241999186155, "grad_norm": 0.54296875, "learning_rate": 0.0019698127793843787, "loss": 0.2342, "step": 8856 }, { "epoch": 0.015705966157171364, "grad_norm": 0.6875, "learning_rate": 0.001969797491766658, "loss": 0.2103, "step": 8858 }, { "epoch": 0.015709512322481183, "grad_norm": 0.5546875, "learning_rate": 0.001969782200344948, "loss": 0.2587, "step": 8860 }, { "epoch": 0.015713058487790997, "grad_norm": 0.421875, "learning_rate": 0.0019697669051193163, "loss": 0.1839, "step": 8862 }, { "epoch": 0.01571660465310081, "grad_norm": 0.419921875, "learning_rate": 0.001969751606089829, "loss": 0.2522, "step": 8864 }, { "epoch": 0.015720150818410626, "grad_norm": 0.60546875, "learning_rate": 0.0019697363032565533, "loss": 0.3043, "step": 8866 }, { "epoch": 0.01572369698372044, "grad_norm": 1.171875, "learning_rate": 0.0019697209966195563, "loss": 0.3253, "step": 8868 }, { "epoch": 0.015727243149030255, "grad_norm": 0.376953125, "learning_rate": 0.001969705686178905, "loss": 0.3684, "step": 8870 }, { "epoch": 0.015730789314340073, "grad_norm": 0.2197265625, "learning_rate": 0.001969690371934666, "loss": 0.2541, "step": 8872 }, { "epoch": 0.015734335479649888, "grad_norm": 0.365234375, "learning_rate": 0.001969675053886907, "loss": 0.2577, "step": 8874 }, { "epoch": 0.015737881644959702, "grad_norm": 0.70703125, "learning_rate": 0.001969659732035694, "loss": 0.3146, "step": 8876 }, { "epoch": 0.015741427810269517, "grad_norm": 0.427734375, "learning_rate": 0.0019696444063810946, "loss": 0.2843, "step": 8878 }, { "epoch": 0.01574497397557933, "grad_norm": 0.365234375, "learning_rate": 0.0019696290769231754, "loss": 0.1831, "step": 8880 }, { "epoch": 0.015748520140889146, "grad_norm": 1.5234375, "learning_rate": 0.001969613743662004, "loss": 0.2378, "step": 8882 }, { "epoch": 0.015752066306198964, "grad_norm": 1.84375, "learning_rate": 0.0019695984065976474, "loss": 0.4394, "step": 8884 }, { "epoch": 0.01575561247150878, "grad_norm": 0.6015625, "learning_rate": 0.0019695830657301726, "loss": 0.2568, "step": 8886 }, { "epoch": 0.015759158636818593, "grad_norm": 0.55859375, "learning_rate": 0.0019695677210596466, "loss": 0.226, "step": 8888 }, { "epoch": 0.015762704802128408, "grad_norm": 0.2373046875, "learning_rate": 0.0019695523725861363, "loss": 0.1982, "step": 8890 }, { "epoch": 0.015766250967438222, "grad_norm": 0.35546875, "learning_rate": 0.0019695370203097087, "loss": 0.431, "step": 8892 }, { "epoch": 0.01576979713274804, "grad_norm": 0.37890625, "learning_rate": 0.0019695216642304316, "loss": 0.2, "step": 8894 }, { "epoch": 0.015773343298057855, "grad_norm": 0.83984375, "learning_rate": 0.001969506304348372, "loss": 0.2671, "step": 8896 }, { "epoch": 0.01577688946336767, "grad_norm": 0.404296875, "learning_rate": 0.0019694909406635965, "loss": 0.3697, "step": 8898 }, { "epoch": 0.015780435628677484, "grad_norm": 0.318359375, "learning_rate": 0.001969475573176173, "loss": 0.2599, "step": 8900 }, { "epoch": 0.0157839817939873, "grad_norm": 0.447265625, "learning_rate": 0.001969460201886168, "loss": 0.2766, "step": 8902 }, { "epoch": 0.015787527959297113, "grad_norm": 0.486328125, "learning_rate": 0.0019694448267936495, "loss": 0.2613, "step": 8904 }, { "epoch": 0.01579107412460693, "grad_norm": 0.37890625, "learning_rate": 0.0019694294478986843, "loss": 0.2224, "step": 8906 }, { "epoch": 0.015794620289916746, "grad_norm": 0.70703125, "learning_rate": 0.0019694140652013396, "loss": 0.2584, "step": 8908 }, { "epoch": 0.01579816645522656, "grad_norm": 0.66796875, "learning_rate": 0.0019693986787016828, "loss": 0.2002, "step": 8910 }, { "epoch": 0.015801712620536375, "grad_norm": 0.318359375, "learning_rate": 0.0019693832883997814, "loss": 0.1634, "step": 8912 }, { "epoch": 0.01580525878584619, "grad_norm": 0.76171875, "learning_rate": 0.0019693678942957022, "loss": 0.2095, "step": 8914 }, { "epoch": 0.015808804951156004, "grad_norm": 0.56640625, "learning_rate": 0.001969352496389513, "loss": 0.2213, "step": 8916 }, { "epoch": 0.015812351116465822, "grad_norm": 0.28125, "learning_rate": 0.001969337094681281, "loss": 0.183, "step": 8918 }, { "epoch": 0.015815897281775636, "grad_norm": 0.416015625, "learning_rate": 0.001969321689171073, "loss": 0.2282, "step": 8920 }, { "epoch": 0.01581944344708545, "grad_norm": 0.73046875, "learning_rate": 0.0019693062798589577, "loss": 0.3324, "step": 8922 }, { "epoch": 0.015822989612395266, "grad_norm": 0.5078125, "learning_rate": 0.0019692908667450016, "loss": 0.1827, "step": 8924 }, { "epoch": 0.01582653577770508, "grad_norm": 0.494140625, "learning_rate": 0.0019692754498292714, "loss": 0.2467, "step": 8926 }, { "epoch": 0.015830081943014898, "grad_norm": 0.671875, "learning_rate": 0.001969260029111836, "loss": 0.2548, "step": 8928 }, { "epoch": 0.015833628108324713, "grad_norm": 0.890625, "learning_rate": 0.001969244604592762, "loss": 0.2927, "step": 8930 }, { "epoch": 0.015837174273634527, "grad_norm": 0.23046875, "learning_rate": 0.001969229176272117, "loss": 0.2603, "step": 8932 }, { "epoch": 0.015840720438944342, "grad_norm": 0.2119140625, "learning_rate": 0.0019692137441499682, "loss": 0.2697, "step": 8934 }, { "epoch": 0.015844266604254156, "grad_norm": 0.419921875, "learning_rate": 0.0019691983082263838, "loss": 0.2379, "step": 8936 }, { "epoch": 0.01584781276956397, "grad_norm": 0.8203125, "learning_rate": 0.001969182868501431, "loss": 0.2241, "step": 8938 }, { "epoch": 0.01585135893487379, "grad_norm": 0.376953125, "learning_rate": 0.0019691674249751765, "loss": 0.2037, "step": 8940 }, { "epoch": 0.015854905100183603, "grad_norm": 0.63671875, "learning_rate": 0.0019691519776476895, "loss": 0.2639, "step": 8942 }, { "epoch": 0.015858451265493418, "grad_norm": 0.33984375, "learning_rate": 0.0019691365265190356, "loss": 0.191, "step": 8944 }, { "epoch": 0.015861997430803233, "grad_norm": 0.421875, "learning_rate": 0.001969121071589284, "loss": 0.2542, "step": 8946 }, { "epoch": 0.015865543596113047, "grad_norm": 0.73046875, "learning_rate": 0.001969105612858502, "loss": 0.2814, "step": 8948 }, { "epoch": 0.01586908976142286, "grad_norm": 0.765625, "learning_rate": 0.0019690901503267564, "loss": 0.2825, "step": 8950 }, { "epoch": 0.01587263592673268, "grad_norm": 0.76953125, "learning_rate": 0.001969074683994115, "loss": 0.3389, "step": 8952 }, { "epoch": 0.015876182092042494, "grad_norm": 0.40234375, "learning_rate": 0.0019690592138606466, "loss": 0.2813, "step": 8954 }, { "epoch": 0.01587972825735231, "grad_norm": 0.33203125, "learning_rate": 0.001969043739926417, "loss": 0.2227, "step": 8956 }, { "epoch": 0.015883274422662123, "grad_norm": 0.3203125, "learning_rate": 0.0019690282621914958, "loss": 0.3896, "step": 8958 }, { "epoch": 0.015886820587971938, "grad_norm": 2.5, "learning_rate": 0.0019690127806559496, "loss": 0.3129, "step": 8960 }, { "epoch": 0.015890366753281756, "grad_norm": 0.458984375, "learning_rate": 0.001968997295319846, "loss": 0.2696, "step": 8962 }, { "epoch": 0.01589391291859157, "grad_norm": 0.65625, "learning_rate": 0.0019689818061832533, "loss": 0.2344, "step": 8964 }, { "epoch": 0.015897459083901385, "grad_norm": 0.40234375, "learning_rate": 0.0019689663132462384, "loss": 0.2639, "step": 8966 }, { "epoch": 0.0159010052492112, "grad_norm": 0.98046875, "learning_rate": 0.00196895081650887, "loss": 0.259, "step": 8968 }, { "epoch": 0.015904551414521014, "grad_norm": 0.6640625, "learning_rate": 0.0019689353159712156, "loss": 0.1795, "step": 8970 }, { "epoch": 0.01590809757983083, "grad_norm": 0.671875, "learning_rate": 0.0019689198116333425, "loss": 0.2184, "step": 8972 }, { "epoch": 0.015911643745140647, "grad_norm": 1.03125, "learning_rate": 0.001968904303495319, "loss": 0.2089, "step": 8974 }, { "epoch": 0.01591518991045046, "grad_norm": 0.73046875, "learning_rate": 0.0019688887915572132, "loss": 0.2451, "step": 8976 }, { "epoch": 0.015918736075760276, "grad_norm": 0.4375, "learning_rate": 0.001968873275819092, "loss": 0.2544, "step": 8978 }, { "epoch": 0.01592228224107009, "grad_norm": 6.65625, "learning_rate": 0.001968857756281024, "loss": 0.4497, "step": 8980 }, { "epoch": 0.015925828406379905, "grad_norm": 0.546875, "learning_rate": 0.001968842232943077, "loss": 0.1765, "step": 8982 }, { "epoch": 0.01592937457168972, "grad_norm": 0.359375, "learning_rate": 0.0019688267058053186, "loss": 0.2274, "step": 8984 }, { "epoch": 0.015932920736999538, "grad_norm": 0.9296875, "learning_rate": 0.0019688111748678164, "loss": 0.2432, "step": 8986 }, { "epoch": 0.015936466902309352, "grad_norm": 4.28125, "learning_rate": 0.0019687956401306396, "loss": 0.2341, "step": 8988 }, { "epoch": 0.015940013067619167, "grad_norm": 4.1875, "learning_rate": 0.0019687801015938547, "loss": 0.3296, "step": 8990 }, { "epoch": 0.01594355923292898, "grad_norm": 0.470703125, "learning_rate": 0.001968764559257531, "loss": 0.213, "step": 8992 }, { "epoch": 0.015947105398238796, "grad_norm": 0.87890625, "learning_rate": 0.001968749013121735, "loss": 0.2537, "step": 8994 }, { "epoch": 0.015950651563548614, "grad_norm": 0.41796875, "learning_rate": 0.001968733463186536, "loss": 0.21, "step": 8996 }, { "epoch": 0.01595419772885843, "grad_norm": 0.400390625, "learning_rate": 0.001968717909452001, "loss": 0.2433, "step": 8998 }, { "epoch": 0.015957743894168243, "grad_norm": 0.515625, "learning_rate": 0.0019687023519181987, "loss": 0.3763, "step": 9000 }, { "epoch": 0.015961290059478057, "grad_norm": 0.6015625, "learning_rate": 0.001968686790585197, "loss": 0.2646, "step": 9002 }, { "epoch": 0.015964836224787872, "grad_norm": 0.59765625, "learning_rate": 0.0019686712254530637, "loss": 0.2571, "step": 9004 }, { "epoch": 0.015968382390097687, "grad_norm": 2.140625, "learning_rate": 0.001968655656521867, "loss": 0.4143, "step": 9006 }, { "epoch": 0.015971928555407505, "grad_norm": 0.28515625, "learning_rate": 0.0019686400837916752, "loss": 0.2215, "step": 9008 }, { "epoch": 0.01597547472071732, "grad_norm": 0.41015625, "learning_rate": 0.001968624507262556, "loss": 0.2064, "step": 9010 }, { "epoch": 0.015979020886027134, "grad_norm": 0.51953125, "learning_rate": 0.0019686089269345787, "loss": 0.2513, "step": 9012 }, { "epoch": 0.015982567051336948, "grad_norm": 0.498046875, "learning_rate": 0.0019685933428078097, "loss": 0.276, "step": 9014 }, { "epoch": 0.015986113216646763, "grad_norm": 0.37109375, "learning_rate": 0.0019685777548823177, "loss": 0.1765, "step": 9016 }, { "epoch": 0.015989659381956577, "grad_norm": 0.30859375, "learning_rate": 0.0019685621631581715, "loss": 0.1829, "step": 9018 }, { "epoch": 0.015993205547266395, "grad_norm": 2.296875, "learning_rate": 0.001968546567635439, "loss": 0.3764, "step": 9020 }, { "epoch": 0.01599675171257621, "grad_norm": 0.65234375, "learning_rate": 0.001968530968314188, "loss": 0.2513, "step": 9022 }, { "epoch": 0.016000297877886024, "grad_norm": 0.5, "learning_rate": 0.001968515365194487, "loss": 0.3134, "step": 9024 }, { "epoch": 0.01600384404319584, "grad_norm": 0.291015625, "learning_rate": 0.0019684997582764046, "loss": 0.1954, "step": 9026 }, { "epoch": 0.016007390208505654, "grad_norm": 0.29296875, "learning_rate": 0.001968484147560009, "loss": 0.1838, "step": 9028 }, { "epoch": 0.01601093637381547, "grad_norm": 0.498046875, "learning_rate": 0.001968468533045368, "loss": 0.347, "step": 9030 }, { "epoch": 0.016014482539125286, "grad_norm": 0.390625, "learning_rate": 0.0019684529147325496, "loss": 0.1951, "step": 9032 }, { "epoch": 0.0160180287044351, "grad_norm": 0.375, "learning_rate": 0.001968437292621623, "loss": 0.419, "step": 9034 }, { "epoch": 0.016021574869744915, "grad_norm": 2.125, "learning_rate": 0.0019684216667126566, "loss": 0.2516, "step": 9036 }, { "epoch": 0.01602512103505473, "grad_norm": 0.2373046875, "learning_rate": 0.0019684060370057177, "loss": 0.2799, "step": 9038 }, { "epoch": 0.016028667200364544, "grad_norm": 0.419921875, "learning_rate": 0.001968390403500875, "loss": 0.2165, "step": 9040 }, { "epoch": 0.016032213365674362, "grad_norm": 0.3046875, "learning_rate": 0.0019683747661981975, "loss": 0.2137, "step": 9042 }, { "epoch": 0.016035759530984177, "grad_norm": 0.7578125, "learning_rate": 0.001968359125097753, "loss": 0.3507, "step": 9044 }, { "epoch": 0.01603930569629399, "grad_norm": 0.462890625, "learning_rate": 0.00196834348019961, "loss": 0.2477, "step": 9046 }, { "epoch": 0.016042851861603806, "grad_norm": 0.7734375, "learning_rate": 0.0019683278315038378, "loss": 0.2222, "step": 9048 }, { "epoch": 0.01604639802691362, "grad_norm": 0.3671875, "learning_rate": 0.0019683121790105033, "loss": 0.2, "step": 9050 }, { "epoch": 0.016049944192223435, "grad_norm": 0.75390625, "learning_rate": 0.0019682965227196757, "loss": 0.2866, "step": 9052 }, { "epoch": 0.016053490357533253, "grad_norm": 0.2353515625, "learning_rate": 0.0019682808626314235, "loss": 0.2183, "step": 9054 }, { "epoch": 0.016057036522843068, "grad_norm": 1.75, "learning_rate": 0.0019682651987458157, "loss": 0.2317, "step": 9056 }, { "epoch": 0.016060582688152882, "grad_norm": 0.953125, "learning_rate": 0.00196824953106292, "loss": 0.2602, "step": 9058 }, { "epoch": 0.016064128853462697, "grad_norm": 0.4765625, "learning_rate": 0.0019682338595828045, "loss": 0.2026, "step": 9060 }, { "epoch": 0.01606767501877251, "grad_norm": 0.306640625, "learning_rate": 0.0019682181843055395, "loss": 0.2367, "step": 9062 }, { "epoch": 0.01607122118408233, "grad_norm": 1.203125, "learning_rate": 0.0019682025052311916, "loss": 0.3423, "step": 9064 }, { "epoch": 0.016074767349392144, "grad_norm": 0.365234375, "learning_rate": 0.001968186822359831, "loss": 0.2734, "step": 9066 }, { "epoch": 0.01607831351470196, "grad_norm": 0.55859375, "learning_rate": 0.001968171135691525, "loss": 0.1992, "step": 9068 }, { "epoch": 0.016081859680011773, "grad_norm": 0.9140625, "learning_rate": 0.001968155445226343, "loss": 0.3029, "step": 9070 }, { "epoch": 0.016085405845321588, "grad_norm": 0.578125, "learning_rate": 0.001968139750964353, "loss": 0.2207, "step": 9072 }, { "epoch": 0.016088952010631402, "grad_norm": 0.6640625, "learning_rate": 0.0019681240529056247, "loss": 0.3986, "step": 9074 }, { "epoch": 0.01609249817594122, "grad_norm": 0.267578125, "learning_rate": 0.0019681083510502254, "loss": 0.5211, "step": 9076 }, { "epoch": 0.016096044341251035, "grad_norm": 0.65625, "learning_rate": 0.0019680926453982243, "loss": 0.2527, "step": 9078 }, { "epoch": 0.01609959050656085, "grad_norm": 0.2734375, "learning_rate": 0.001968076935949691, "loss": 0.204, "step": 9080 }, { "epoch": 0.016103136671870664, "grad_norm": 1.6484375, "learning_rate": 0.001968061222704693, "loss": 0.3984, "step": 9082 }, { "epoch": 0.01610668283718048, "grad_norm": 0.52734375, "learning_rate": 0.0019680455056632993, "loss": 0.2289, "step": 9084 }, { "epoch": 0.016110229002490293, "grad_norm": 2.1875, "learning_rate": 0.001968029784825579, "loss": 0.3513, "step": 9086 }, { "epoch": 0.01611377516780011, "grad_norm": 1.2890625, "learning_rate": 0.0019680140601916005, "loss": 0.2703, "step": 9088 }, { "epoch": 0.016117321333109925, "grad_norm": 0.37890625, "learning_rate": 0.001967998331761433, "loss": 0.229, "step": 9090 }, { "epoch": 0.01612086749841974, "grad_norm": 0.6484375, "learning_rate": 0.0019679825995351447, "loss": 0.3204, "step": 9092 }, { "epoch": 0.016124413663729555, "grad_norm": 0.36328125, "learning_rate": 0.001967966863512805, "loss": 0.224, "step": 9094 }, { "epoch": 0.01612795982903937, "grad_norm": 0.57421875, "learning_rate": 0.0019679511236944816, "loss": 0.2061, "step": 9096 }, { "epoch": 0.016131505994349187, "grad_norm": 0.48828125, "learning_rate": 0.001967935380080245, "loss": 0.2472, "step": 9098 }, { "epoch": 0.016135052159659, "grad_norm": 0.466796875, "learning_rate": 0.0019679196326701626, "loss": 0.2555, "step": 9100 }, { "epoch": 0.016138598324968816, "grad_norm": 0.474609375, "learning_rate": 0.0019679038814643043, "loss": 0.1926, "step": 9102 }, { "epoch": 0.01614214449027863, "grad_norm": 0.38671875, "learning_rate": 0.001967888126462739, "loss": 0.2364, "step": 9104 }, { "epoch": 0.016145690655588445, "grad_norm": 0.35546875, "learning_rate": 0.001967872367665534, "loss": 0.2426, "step": 9106 }, { "epoch": 0.01614923682089826, "grad_norm": 0.90625, "learning_rate": 0.0019678566050727606, "loss": 0.2446, "step": 9108 }, { "epoch": 0.016152782986208078, "grad_norm": 0.455078125, "learning_rate": 0.001967840838684486, "loss": 0.2825, "step": 9110 }, { "epoch": 0.016156329151517893, "grad_norm": 0.515625, "learning_rate": 0.001967825068500779, "loss": 0.2445, "step": 9112 }, { "epoch": 0.016159875316827707, "grad_norm": 1.3046875, "learning_rate": 0.00196780929452171, "loss": 0.2415, "step": 9114 }, { "epoch": 0.01616342148213752, "grad_norm": 0.58203125, "learning_rate": 0.001967793516747347, "loss": 0.3694, "step": 9116 }, { "epoch": 0.016166967647447336, "grad_norm": 0.359375, "learning_rate": 0.0019677777351777594, "loss": 0.1842, "step": 9118 }, { "epoch": 0.01617051381275715, "grad_norm": 0.419921875, "learning_rate": 0.001967761949813016, "loss": 0.3075, "step": 9120 }, { "epoch": 0.01617405997806697, "grad_norm": 0.7734375, "learning_rate": 0.001967746160653186, "loss": 0.2139, "step": 9122 }, { "epoch": 0.016177606143376783, "grad_norm": 0.62890625, "learning_rate": 0.001967730367698338, "loss": 0.281, "step": 9124 }, { "epoch": 0.016181152308686598, "grad_norm": 0.78515625, "learning_rate": 0.0019677145709485417, "loss": 0.2777, "step": 9126 }, { "epoch": 0.016184698473996412, "grad_norm": 0.376953125, "learning_rate": 0.0019676987704038656, "loss": 0.2851, "step": 9128 }, { "epoch": 0.016188244639306227, "grad_norm": 0.61328125, "learning_rate": 0.001967682966064379, "loss": 0.2513, "step": 9130 }, { "epoch": 0.016191790804616045, "grad_norm": 0.50390625, "learning_rate": 0.001967667157930152, "loss": 0.2483, "step": 9132 }, { "epoch": 0.01619533696992586, "grad_norm": 0.88671875, "learning_rate": 0.001967651346001252, "loss": 0.2562, "step": 9134 }, { "epoch": 0.016198883135235674, "grad_norm": 0.85546875, "learning_rate": 0.0019676355302777494, "loss": 0.4384, "step": 9136 }, { "epoch": 0.01620242930054549, "grad_norm": 0.4140625, "learning_rate": 0.001967619710759713, "loss": 0.26, "step": 9138 }, { "epoch": 0.016205975465855303, "grad_norm": 0.365234375, "learning_rate": 0.0019676038874472117, "loss": 0.264, "step": 9140 }, { "epoch": 0.016209521631165118, "grad_norm": 0.6796875, "learning_rate": 0.001967588060340315, "loss": 0.3224, "step": 9142 }, { "epoch": 0.016213067796474936, "grad_norm": 0.248046875, "learning_rate": 0.0019675722294390918, "loss": 0.2915, "step": 9144 }, { "epoch": 0.01621661396178475, "grad_norm": 0.59765625, "learning_rate": 0.0019675563947436124, "loss": 0.2036, "step": 9146 }, { "epoch": 0.016220160127094565, "grad_norm": 1.3125, "learning_rate": 0.0019675405562539446, "loss": 0.2886, "step": 9148 }, { "epoch": 0.01622370629240438, "grad_norm": 0.8671875, "learning_rate": 0.0019675247139701586, "loss": 0.2406, "step": 9150 }, { "epoch": 0.016227252457714194, "grad_norm": 0.58984375, "learning_rate": 0.0019675088678923233, "loss": 0.2157, "step": 9152 }, { "epoch": 0.01623079862302401, "grad_norm": 1.4296875, "learning_rate": 0.001967493018020508, "loss": 0.2927, "step": 9154 }, { "epoch": 0.016234344788333827, "grad_norm": 0.64453125, "learning_rate": 0.001967477164354783, "loss": 0.2413, "step": 9156 }, { "epoch": 0.01623789095364364, "grad_norm": 0.77734375, "learning_rate": 0.0019674613068952156, "loss": 0.2759, "step": 9158 }, { "epoch": 0.016241437118953456, "grad_norm": 0.451171875, "learning_rate": 0.001967445445641877, "loss": 0.2807, "step": 9160 }, { "epoch": 0.01624498328426327, "grad_norm": 0.3984375, "learning_rate": 0.0019674295805948354, "loss": 0.2622, "step": 9162 }, { "epoch": 0.016248529449573085, "grad_norm": 2.921875, "learning_rate": 0.001967413711754161, "loss": 0.2902, "step": 9164 }, { "epoch": 0.016252075614882903, "grad_norm": 0.45703125, "learning_rate": 0.001967397839119923, "loss": 0.2736, "step": 9166 }, { "epoch": 0.016255621780192717, "grad_norm": 1.7265625, "learning_rate": 0.0019673819626921905, "loss": 0.3685, "step": 9168 }, { "epoch": 0.016259167945502532, "grad_norm": 0.57421875, "learning_rate": 0.0019673660824710334, "loss": 0.1939, "step": 9170 }, { "epoch": 0.016262714110812346, "grad_norm": 1.7265625, "learning_rate": 0.0019673501984565204, "loss": 0.3006, "step": 9172 }, { "epoch": 0.01626626027612216, "grad_norm": 1.0625, "learning_rate": 0.001967334310648722, "loss": 0.2636, "step": 9174 }, { "epoch": 0.016269806441431976, "grad_norm": 0.33203125, "learning_rate": 0.0019673184190477067, "loss": 0.3767, "step": 9176 }, { "epoch": 0.016273352606741794, "grad_norm": 1.171875, "learning_rate": 0.0019673025236535448, "loss": 0.2422, "step": 9178 }, { "epoch": 0.016276898772051608, "grad_norm": 0.45703125, "learning_rate": 0.0019672866244663054, "loss": 0.1636, "step": 9180 }, { "epoch": 0.016280444937361423, "grad_norm": 0.32421875, "learning_rate": 0.0019672707214860577, "loss": 0.2745, "step": 9182 }, { "epoch": 0.016283991102671237, "grad_norm": 1.2734375, "learning_rate": 0.001967254814712872, "loss": 0.2408, "step": 9184 }, { "epoch": 0.016287537267981052, "grad_norm": 0.412109375, "learning_rate": 0.0019672389041468174, "loss": 0.2722, "step": 9186 }, { "epoch": 0.016291083433290866, "grad_norm": 0.83984375, "learning_rate": 0.0019672229897879636, "loss": 0.2187, "step": 9188 }, { "epoch": 0.016294629598600684, "grad_norm": 0.40234375, "learning_rate": 0.00196720707163638, "loss": 0.3051, "step": 9190 }, { "epoch": 0.0162981757639105, "grad_norm": 0.3671875, "learning_rate": 0.0019671911496921364, "loss": 0.2302, "step": 9192 }, { "epoch": 0.016301721929220313, "grad_norm": 0.46484375, "learning_rate": 0.0019671752239553025, "loss": 0.3003, "step": 9194 }, { "epoch": 0.016305268094530128, "grad_norm": 1.2265625, "learning_rate": 0.001967159294425948, "loss": 0.3314, "step": 9196 }, { "epoch": 0.016308814259839943, "grad_norm": 1.03125, "learning_rate": 0.0019671433611041424, "loss": 0.2185, "step": 9198 }, { "epoch": 0.01631236042514976, "grad_norm": 0.494140625, "learning_rate": 0.0019671274239899555, "loss": 0.2651, "step": 9200 }, { "epoch": 0.016315906590459575, "grad_norm": 0.52734375, "learning_rate": 0.001967111483083457, "loss": 0.2236, "step": 9202 }, { "epoch": 0.01631945275576939, "grad_norm": 0.87890625, "learning_rate": 0.001967095538384716, "loss": 0.2779, "step": 9204 }, { "epoch": 0.016322998921079204, "grad_norm": 0.39453125, "learning_rate": 0.0019670795898938036, "loss": 0.3884, "step": 9206 }, { "epoch": 0.01632654508638902, "grad_norm": 2.5, "learning_rate": 0.0019670636376107886, "loss": 0.2762, "step": 9208 }, { "epoch": 0.016330091251698833, "grad_norm": 0.53125, "learning_rate": 0.0019670476815357406, "loss": 0.2428, "step": 9210 }, { "epoch": 0.01633363741700865, "grad_norm": 0.38671875, "learning_rate": 0.0019670317216687297, "loss": 0.2458, "step": 9212 }, { "epoch": 0.016337183582318466, "grad_norm": 0.384765625, "learning_rate": 0.0019670157580098254, "loss": 0.2737, "step": 9214 }, { "epoch": 0.01634072974762828, "grad_norm": 2.3125, "learning_rate": 0.0019669997905590983, "loss": 0.2491, "step": 9216 }, { "epoch": 0.016344275912938095, "grad_norm": 1.2421875, "learning_rate": 0.0019669838193166174, "loss": 0.2095, "step": 9218 }, { "epoch": 0.01634782207824791, "grad_norm": 0.369140625, "learning_rate": 0.0019669678442824534, "loss": 0.2204, "step": 9220 }, { "epoch": 0.016351368243557724, "grad_norm": 0.3046875, "learning_rate": 0.0019669518654566753, "loss": 0.258, "step": 9222 }, { "epoch": 0.016354914408867542, "grad_norm": 0.357421875, "learning_rate": 0.0019669358828393532, "loss": 0.1901, "step": 9224 }, { "epoch": 0.016358460574177357, "grad_norm": 0.302734375, "learning_rate": 0.001966919896430557, "loss": 0.2416, "step": 9226 }, { "epoch": 0.01636200673948717, "grad_norm": 2.5625, "learning_rate": 0.001966903906230357, "loss": 0.3124, "step": 9228 }, { "epoch": 0.016365552904796986, "grad_norm": 0.29296875, "learning_rate": 0.001966887912238823, "loss": 0.5054, "step": 9230 }, { "epoch": 0.0163690990701068, "grad_norm": 0.443359375, "learning_rate": 0.0019668719144560246, "loss": 0.2961, "step": 9232 }, { "epoch": 0.01637264523541662, "grad_norm": 0.703125, "learning_rate": 0.0019668559128820317, "loss": 0.2606, "step": 9234 }, { "epoch": 0.016376191400726433, "grad_norm": 0.80078125, "learning_rate": 0.001966839907516915, "loss": 0.2821, "step": 9236 }, { "epoch": 0.016379737566036248, "grad_norm": 0.62109375, "learning_rate": 0.0019668238983607442, "loss": 0.2284, "step": 9238 }, { "epoch": 0.016383283731346062, "grad_norm": 3.140625, "learning_rate": 0.001966807885413589, "loss": 0.2976, "step": 9240 }, { "epoch": 0.016386829896655877, "grad_norm": 1.546875, "learning_rate": 0.0019667918686755194, "loss": 0.3194, "step": 9242 }, { "epoch": 0.01639037606196569, "grad_norm": 0.92578125, "learning_rate": 0.001966775848146606, "loss": 0.1863, "step": 9244 }, { "epoch": 0.01639392222727551, "grad_norm": 0.279296875, "learning_rate": 0.0019667598238269184, "loss": 0.1665, "step": 9246 }, { "epoch": 0.016397468392585324, "grad_norm": 0.490234375, "learning_rate": 0.0019667437957165265, "loss": 0.2047, "step": 9248 }, { "epoch": 0.01640101455789514, "grad_norm": 0.62109375, "learning_rate": 0.001966727763815501, "loss": 0.3, "step": 9250 }, { "epoch": 0.016404560723204953, "grad_norm": 1.6953125, "learning_rate": 0.0019667117281239118, "loss": 0.3029, "step": 9252 }, { "epoch": 0.016408106888514767, "grad_norm": 0.84375, "learning_rate": 0.001966695688641829, "loss": 0.3038, "step": 9254 }, { "epoch": 0.016411653053824582, "grad_norm": 0.75390625, "learning_rate": 0.0019666796453693225, "loss": 0.2118, "step": 9256 }, { "epoch": 0.0164151992191344, "grad_norm": 0.8046875, "learning_rate": 0.0019666635983064625, "loss": 0.2733, "step": 9258 }, { "epoch": 0.016418745384444215, "grad_norm": 0.443359375, "learning_rate": 0.0019666475474533194, "loss": 0.1609, "step": 9260 }, { "epoch": 0.01642229154975403, "grad_norm": 0.353515625, "learning_rate": 0.0019666314928099638, "loss": 0.2794, "step": 9262 }, { "epoch": 0.016425837715063844, "grad_norm": 0.94140625, "learning_rate": 0.001966615434376465, "loss": 0.2715, "step": 9264 }, { "epoch": 0.016429383880373658, "grad_norm": 0.45703125, "learning_rate": 0.0019665993721528934, "loss": 0.2159, "step": 9266 }, { "epoch": 0.016432930045683476, "grad_norm": 0.8359375, "learning_rate": 0.00196658330613932, "loss": 0.2003, "step": 9268 }, { "epoch": 0.01643647621099329, "grad_norm": 0.353515625, "learning_rate": 0.0019665672363358144, "loss": 0.1921, "step": 9270 }, { "epoch": 0.016440022376303105, "grad_norm": 0.55859375, "learning_rate": 0.0019665511627424475, "loss": 0.2469, "step": 9272 }, { "epoch": 0.01644356854161292, "grad_norm": 0.3203125, "learning_rate": 0.001966535085359289, "loss": 0.2532, "step": 9274 }, { "epoch": 0.016447114706922734, "grad_norm": 0.65234375, "learning_rate": 0.001966519004186409, "loss": 0.2338, "step": 9276 }, { "epoch": 0.01645066087223255, "grad_norm": 0.396484375, "learning_rate": 0.001966502919223878, "loss": 0.2488, "step": 9278 }, { "epoch": 0.016454207037542367, "grad_norm": 0.53515625, "learning_rate": 0.0019664868304717672, "loss": 0.1875, "step": 9280 }, { "epoch": 0.01645775320285218, "grad_norm": 0.79296875, "learning_rate": 0.001966470737930146, "loss": 0.2337, "step": 9282 }, { "epoch": 0.016461299368161996, "grad_norm": 1.4453125, "learning_rate": 0.0019664546415990854, "loss": 0.3458, "step": 9284 }, { "epoch": 0.01646484553347181, "grad_norm": 0.56640625, "learning_rate": 0.0019664385414786554, "loss": 0.2514, "step": 9286 }, { "epoch": 0.016468391698781625, "grad_norm": 0.369140625, "learning_rate": 0.001966422437568926, "loss": 0.2683, "step": 9288 }, { "epoch": 0.01647193786409144, "grad_norm": 0.421875, "learning_rate": 0.001966406329869969, "loss": 0.2173, "step": 9290 }, { "epoch": 0.016475484029401258, "grad_norm": 0.447265625, "learning_rate": 0.001966390218381853, "loss": 0.2834, "step": 9292 }, { "epoch": 0.016479030194711072, "grad_norm": 1.390625, "learning_rate": 0.00196637410310465, "loss": 0.2814, "step": 9294 }, { "epoch": 0.016482576360020887, "grad_norm": 0.8125, "learning_rate": 0.0019663579840384303, "loss": 0.2642, "step": 9296 }, { "epoch": 0.0164861225253307, "grad_norm": 0.53125, "learning_rate": 0.0019663418611832635, "loss": 0.2865, "step": 9298 }, { "epoch": 0.016489668690640516, "grad_norm": 0.419921875, "learning_rate": 0.001966325734539221, "loss": 0.212, "step": 9300 }, { "epoch": 0.016493214855950334, "grad_norm": 0.6953125, "learning_rate": 0.001966309604106372, "loss": 0.3237, "step": 9302 }, { "epoch": 0.01649676102126015, "grad_norm": 0.36328125, "learning_rate": 0.001966293469884789, "loss": 0.2307, "step": 9304 }, { "epoch": 0.016500307186569963, "grad_norm": 0.42578125, "learning_rate": 0.001966277331874541, "loss": 0.2543, "step": 9306 }, { "epoch": 0.016503853351879778, "grad_norm": 0.82421875, "learning_rate": 0.0019662611900756995, "loss": 0.2334, "step": 9308 }, { "epoch": 0.016507399517189592, "grad_norm": 0.9609375, "learning_rate": 0.001966245044488335, "loss": 0.2651, "step": 9310 }, { "epoch": 0.016510945682499407, "grad_norm": 0.412109375, "learning_rate": 0.0019662288951125175, "loss": 0.1734, "step": 9312 }, { "epoch": 0.016514491847809225, "grad_norm": 0.359375, "learning_rate": 0.0019662127419483176, "loss": 0.2603, "step": 9314 }, { "epoch": 0.01651803801311904, "grad_norm": 0.59375, "learning_rate": 0.001966196584995807, "loss": 0.2612, "step": 9316 }, { "epoch": 0.016521584178428854, "grad_norm": 0.375, "learning_rate": 0.0019661804242550552, "loss": 0.2759, "step": 9318 }, { "epoch": 0.01652513034373867, "grad_norm": 0.6484375, "learning_rate": 0.0019661642597261337, "loss": 0.2949, "step": 9320 }, { "epoch": 0.016528676509048483, "grad_norm": 1.2265625, "learning_rate": 0.0019661480914091125, "loss": 0.2587, "step": 9322 }, { "epoch": 0.016532222674358298, "grad_norm": 0.6875, "learning_rate": 0.001966131919304063, "loss": 0.282, "step": 9324 }, { "epoch": 0.016535768839668116, "grad_norm": 0.3828125, "learning_rate": 0.0019661157434110554, "loss": 0.2139, "step": 9326 }, { "epoch": 0.01653931500497793, "grad_norm": 0.259765625, "learning_rate": 0.0019660995637301604, "loss": 0.3594, "step": 9328 }, { "epoch": 0.016542861170287745, "grad_norm": 0.328125, "learning_rate": 0.0019660833802614495, "loss": 0.2163, "step": 9330 }, { "epoch": 0.01654640733559756, "grad_norm": 0.373046875, "learning_rate": 0.001966067193004993, "loss": 0.2012, "step": 9332 }, { "epoch": 0.016549953500907374, "grad_norm": 0.423828125, "learning_rate": 0.001966051001960861, "loss": 0.2219, "step": 9334 }, { "epoch": 0.016553499666217192, "grad_norm": 0.6875, "learning_rate": 0.0019660348071291254, "loss": 0.2263, "step": 9336 }, { "epoch": 0.016557045831527006, "grad_norm": 0.28515625, "learning_rate": 0.001966018608509857, "loss": 0.2248, "step": 9338 }, { "epoch": 0.01656059199683682, "grad_norm": 0.609375, "learning_rate": 0.001966002406103126, "loss": 0.2741, "step": 9340 }, { "epoch": 0.016564138162146635, "grad_norm": 0.255859375, "learning_rate": 0.0019659861999090033, "loss": 0.2063, "step": 9342 }, { "epoch": 0.01656768432745645, "grad_norm": 0.40234375, "learning_rate": 0.00196596998992756, "loss": 0.2669, "step": 9344 }, { "epoch": 0.016571230492766265, "grad_norm": 0.75, "learning_rate": 0.001965953776158867, "loss": 0.2563, "step": 9346 }, { "epoch": 0.016574776658076083, "grad_norm": 0.404296875, "learning_rate": 0.001965937558602995, "loss": 0.4689, "step": 9348 }, { "epoch": 0.016578322823385897, "grad_norm": 0.380859375, "learning_rate": 0.0019659213372600157, "loss": 0.2028, "step": 9350 }, { "epoch": 0.01658186898869571, "grad_norm": 0.435546875, "learning_rate": 0.001965905112129999, "loss": 0.2586, "step": 9352 }, { "epoch": 0.016585415154005526, "grad_norm": 0.3671875, "learning_rate": 0.0019658888832130164, "loss": 0.2505, "step": 9354 }, { "epoch": 0.01658896131931534, "grad_norm": 0.30859375, "learning_rate": 0.0019658726505091388, "loss": 0.2558, "step": 9356 }, { "epoch": 0.016592507484625155, "grad_norm": 3.703125, "learning_rate": 0.0019658564140184374, "loss": 0.2322, "step": 9358 }, { "epoch": 0.016596053649934973, "grad_norm": 0.34765625, "learning_rate": 0.0019658401737409825, "loss": 0.2673, "step": 9360 }, { "epoch": 0.016599599815244788, "grad_norm": 0.84375, "learning_rate": 0.001965823929676846, "loss": 0.2613, "step": 9362 }, { "epoch": 0.016603145980554603, "grad_norm": 0.365234375, "learning_rate": 0.0019658076818260986, "loss": 0.2405, "step": 9364 }, { "epoch": 0.016606692145864417, "grad_norm": 0.80078125, "learning_rate": 0.001965791430188811, "loss": 0.2341, "step": 9366 }, { "epoch": 0.01661023831117423, "grad_norm": 0.2333984375, "learning_rate": 0.0019657751747650548, "loss": 0.2252, "step": 9368 }, { "epoch": 0.01661378447648405, "grad_norm": 0.376953125, "learning_rate": 0.0019657589155549007, "loss": 0.2062, "step": 9370 }, { "epoch": 0.016617330641793864, "grad_norm": 0.6796875, "learning_rate": 0.0019657426525584204, "loss": 0.3197, "step": 9372 }, { "epoch": 0.01662087680710368, "grad_norm": 0.314453125, "learning_rate": 0.0019657263857756845, "loss": 0.2255, "step": 9374 }, { "epoch": 0.016624422972413493, "grad_norm": 0.3359375, "learning_rate": 0.001965710115206764, "loss": 0.2125, "step": 9376 }, { "epoch": 0.016627969137723308, "grad_norm": 0.52734375, "learning_rate": 0.001965693840851731, "loss": 0.2745, "step": 9378 }, { "epoch": 0.016631515303033122, "grad_norm": 0.337890625, "learning_rate": 0.0019656775627106553, "loss": 0.2305, "step": 9380 }, { "epoch": 0.01663506146834294, "grad_norm": 0.40625, "learning_rate": 0.001965661280783609, "loss": 0.2013, "step": 9382 }, { "epoch": 0.016638607633652755, "grad_norm": 0.50390625, "learning_rate": 0.001965644995070663, "loss": 0.2655, "step": 9384 }, { "epoch": 0.01664215379896257, "grad_norm": 0.396484375, "learning_rate": 0.0019656287055718883, "loss": 0.338, "step": 9386 }, { "epoch": 0.016645699964272384, "grad_norm": 0.46484375, "learning_rate": 0.0019656124122873573, "loss": 0.1839, "step": 9388 }, { "epoch": 0.0166492461295822, "grad_norm": 0.470703125, "learning_rate": 0.0019655961152171397, "loss": 0.2716, "step": 9390 }, { "epoch": 0.016652792294892013, "grad_norm": 0.462890625, "learning_rate": 0.001965579814361308, "loss": 0.2017, "step": 9392 }, { "epoch": 0.01665633846020183, "grad_norm": 1.9375, "learning_rate": 0.0019655635097199325, "loss": 0.3478, "step": 9394 }, { "epoch": 0.016659884625511646, "grad_norm": 0.25390625, "learning_rate": 0.0019655472012930853, "loss": 0.2572, "step": 9396 }, { "epoch": 0.01666343079082146, "grad_norm": 0.8828125, "learning_rate": 0.0019655308890808372, "loss": 0.2865, "step": 9398 }, { "epoch": 0.016666976956131275, "grad_norm": 0.41796875, "learning_rate": 0.00196551457308326, "loss": 0.2431, "step": 9400 }, { "epoch": 0.01667052312144109, "grad_norm": 0.298828125, "learning_rate": 0.0019654982533004245, "loss": 0.4089, "step": 9402 }, { "epoch": 0.016674069286750907, "grad_norm": 0.89453125, "learning_rate": 0.0019654819297324024, "loss": 0.2949, "step": 9404 }, { "epoch": 0.016677615452060722, "grad_norm": 1.125, "learning_rate": 0.0019654656023792654, "loss": 0.4328, "step": 9406 }, { "epoch": 0.016681161617370537, "grad_norm": 0.373046875, "learning_rate": 0.0019654492712410845, "loss": 0.2589, "step": 9408 }, { "epoch": 0.01668470778268035, "grad_norm": 0.451171875, "learning_rate": 0.001965432936317931, "loss": 0.3978, "step": 9410 }, { "epoch": 0.016688253947990166, "grad_norm": 0.70703125, "learning_rate": 0.0019654165976098767, "loss": 0.2142, "step": 9412 }, { "epoch": 0.01669180011329998, "grad_norm": 0.408203125, "learning_rate": 0.0019654002551169926, "loss": 0.208, "step": 9414 }, { "epoch": 0.016695346278609798, "grad_norm": 0.59375, "learning_rate": 0.001965383908839351, "loss": 0.2061, "step": 9416 }, { "epoch": 0.016698892443919613, "grad_norm": 0.8828125, "learning_rate": 0.0019653675587770225, "loss": 0.4743, "step": 9418 }, { "epoch": 0.016702438609229427, "grad_norm": 1.0625, "learning_rate": 0.001965351204930079, "loss": 0.2348, "step": 9420 }, { "epoch": 0.016705984774539242, "grad_norm": 0.7734375, "learning_rate": 0.001965334847298592, "loss": 0.2371, "step": 9422 }, { "epoch": 0.016709530939849056, "grad_norm": 2.40625, "learning_rate": 0.0019653184858826334, "loss": 0.2486, "step": 9424 }, { "epoch": 0.01671307710515887, "grad_norm": 0.3828125, "learning_rate": 0.0019653021206822736, "loss": 0.2321, "step": 9426 }, { "epoch": 0.01671662327046869, "grad_norm": 0.404296875, "learning_rate": 0.0019652857516975855, "loss": 0.1857, "step": 9428 }, { "epoch": 0.016720169435778504, "grad_norm": 0.5234375, "learning_rate": 0.00196526937892864, "loss": 0.2349, "step": 9430 }, { "epoch": 0.016723715601088318, "grad_norm": 0.3984375, "learning_rate": 0.0019652530023755088, "loss": 0.1952, "step": 9432 }, { "epoch": 0.016727261766398133, "grad_norm": 0.453125, "learning_rate": 0.0019652366220382638, "loss": 0.2145, "step": 9434 }, { "epoch": 0.016730807931707947, "grad_norm": 0.54296875, "learning_rate": 0.001965220237916976, "loss": 0.1916, "step": 9436 }, { "epoch": 0.016734354097017765, "grad_norm": 0.63671875, "learning_rate": 0.0019652038500117177, "loss": 0.2237, "step": 9438 }, { "epoch": 0.01673790026232758, "grad_norm": 0.6484375, "learning_rate": 0.00196518745832256, "loss": 0.2424, "step": 9440 }, { "epoch": 0.016741446427637394, "grad_norm": 0.71484375, "learning_rate": 0.0019651710628495757, "loss": 0.276, "step": 9442 }, { "epoch": 0.01674499259294721, "grad_norm": 0.62109375, "learning_rate": 0.0019651546635928354, "loss": 0.3992, "step": 9444 }, { "epoch": 0.016748538758257023, "grad_norm": 0.3046875, "learning_rate": 0.0019651382605524108, "loss": 0.1902, "step": 9446 }, { "epoch": 0.016752084923566838, "grad_norm": 0.84375, "learning_rate": 0.0019651218537283742, "loss": 0.232, "step": 9448 }, { "epoch": 0.016755631088876656, "grad_norm": 1.234375, "learning_rate": 0.0019651054431207974, "loss": 0.1949, "step": 9450 }, { "epoch": 0.01675917725418647, "grad_norm": 1.0078125, "learning_rate": 0.0019650890287297514, "loss": 0.3047, "step": 9452 }, { "epoch": 0.016762723419496285, "grad_norm": 0.48046875, "learning_rate": 0.0019650726105553086, "loss": 0.2615, "step": 9454 }, { "epoch": 0.0167662695848061, "grad_norm": 0.61328125, "learning_rate": 0.001965056188597541, "loss": 0.2362, "step": 9456 }, { "epoch": 0.016769815750115914, "grad_norm": 0.302734375, "learning_rate": 0.00196503976285652, "loss": 0.2349, "step": 9458 }, { "epoch": 0.01677336191542573, "grad_norm": 1.03125, "learning_rate": 0.0019650233333323172, "loss": 0.2014, "step": 9460 }, { "epoch": 0.016776908080735547, "grad_norm": 0.337890625, "learning_rate": 0.001965006900025005, "loss": 0.2017, "step": 9462 }, { "epoch": 0.01678045424604536, "grad_norm": 0.5, "learning_rate": 0.0019649904629346557, "loss": 0.2151, "step": 9464 }, { "epoch": 0.016784000411355176, "grad_norm": 0.40234375, "learning_rate": 0.0019649740220613393, "loss": 0.2648, "step": 9466 }, { "epoch": 0.01678754657666499, "grad_norm": 0.55859375, "learning_rate": 0.00196495757740513, "loss": 0.2362, "step": 9468 }, { "epoch": 0.016791092741974805, "grad_norm": 1.71875, "learning_rate": 0.001964941128966098, "loss": 0.3285, "step": 9470 }, { "epoch": 0.016794638907284623, "grad_norm": 1.1484375, "learning_rate": 0.0019649246767443167, "loss": 0.3969, "step": 9472 }, { "epoch": 0.016798185072594438, "grad_norm": 0.388671875, "learning_rate": 0.001964908220739857, "loss": 0.2377, "step": 9474 }, { "epoch": 0.016801731237904252, "grad_norm": 0.58984375, "learning_rate": 0.001964891760952791, "loss": 0.4679, "step": 9476 }, { "epoch": 0.016805277403214067, "grad_norm": 0.369140625, "learning_rate": 0.0019648752973831906, "loss": 0.1752, "step": 9478 }, { "epoch": 0.01680882356852388, "grad_norm": 0.5625, "learning_rate": 0.0019648588300311283, "loss": 0.2766, "step": 9480 }, { "epoch": 0.016812369733833696, "grad_norm": 0.2412109375, "learning_rate": 0.001964842358896676, "loss": 0.2533, "step": 9482 }, { "epoch": 0.016815915899143514, "grad_norm": 1.0546875, "learning_rate": 0.0019648258839799052, "loss": 0.2178, "step": 9484 }, { "epoch": 0.01681946206445333, "grad_norm": 0.5625, "learning_rate": 0.0019648094052808884, "loss": 0.2029, "step": 9486 }, { "epoch": 0.016823008229763143, "grad_norm": 1.140625, "learning_rate": 0.0019647929227996977, "loss": 0.2834, "step": 9488 }, { "epoch": 0.016826554395072957, "grad_norm": 0.384765625, "learning_rate": 0.0019647764365364052, "loss": 0.4014, "step": 9490 }, { "epoch": 0.016830100560382772, "grad_norm": 0.3125, "learning_rate": 0.001964759946491083, "loss": 0.2392, "step": 9492 }, { "epoch": 0.016833646725692587, "grad_norm": 0.263671875, "learning_rate": 0.001964743452663803, "loss": 0.2035, "step": 9494 }, { "epoch": 0.016837192891002405, "grad_norm": 1.21875, "learning_rate": 0.0019647269550546373, "loss": 0.2188, "step": 9496 }, { "epoch": 0.01684073905631222, "grad_norm": 0.5, "learning_rate": 0.001964710453663658, "loss": 0.2276, "step": 9498 }, { "epoch": 0.016844285221622034, "grad_norm": 0.51953125, "learning_rate": 0.0019646939484909377, "loss": 0.2155, "step": 9500 }, { "epoch": 0.01684783138693185, "grad_norm": 0.8828125, "learning_rate": 0.0019646774395365484, "loss": 0.2257, "step": 9502 }, { "epoch": 0.016851377552241663, "grad_norm": 0.3671875, "learning_rate": 0.001964660926800562, "loss": 0.3609, "step": 9504 }, { "epoch": 0.01685492371755148, "grad_norm": 0.87890625, "learning_rate": 0.0019646444102830512, "loss": 0.2938, "step": 9506 }, { "epoch": 0.016858469882861295, "grad_norm": 2.21875, "learning_rate": 0.001964627889984088, "loss": 0.4673, "step": 9508 }, { "epoch": 0.01686201604817111, "grad_norm": 0.63671875, "learning_rate": 0.0019646113659037446, "loss": 0.2517, "step": 9510 }, { "epoch": 0.016865562213480925, "grad_norm": 0.68359375, "learning_rate": 0.0019645948380420937, "loss": 0.268, "step": 9512 }, { "epoch": 0.01686910837879074, "grad_norm": 0.341796875, "learning_rate": 0.0019645783063992066, "loss": 0.2256, "step": 9514 }, { "epoch": 0.016872654544100554, "grad_norm": 1.8828125, "learning_rate": 0.001964561770975156, "loss": 0.3528, "step": 9516 }, { "epoch": 0.01687620070941037, "grad_norm": 2.6875, "learning_rate": 0.0019645452317700153, "loss": 0.3423, "step": 9518 }, { "epoch": 0.016879746874720186, "grad_norm": 0.2470703125, "learning_rate": 0.001964528688783855, "loss": 0.1844, "step": 9520 }, { "epoch": 0.01688329304003, "grad_norm": 0.58984375, "learning_rate": 0.001964512142016749, "loss": 0.1722, "step": 9522 }, { "epoch": 0.016886839205339815, "grad_norm": 0.63671875, "learning_rate": 0.0019644955914687686, "loss": 0.2366, "step": 9524 }, { "epoch": 0.01689038537064963, "grad_norm": 0.671875, "learning_rate": 0.001964479037139987, "loss": 0.1995, "step": 9526 }, { "epoch": 0.016893931535959444, "grad_norm": 0.3203125, "learning_rate": 0.0019644624790304765, "loss": 0.2399, "step": 9528 }, { "epoch": 0.016897477701269262, "grad_norm": 0.7265625, "learning_rate": 0.001964445917140309, "loss": 0.2286, "step": 9530 }, { "epoch": 0.016901023866579077, "grad_norm": 0.66796875, "learning_rate": 0.001964429351469557, "loss": 0.3129, "step": 9532 }, { "epoch": 0.01690457003188889, "grad_norm": 0.5390625, "learning_rate": 0.0019644127820182935, "loss": 0.2849, "step": 9534 }, { "epoch": 0.016908116197198706, "grad_norm": 1.7265625, "learning_rate": 0.00196439620878659, "loss": 0.2212, "step": 9536 }, { "epoch": 0.01691166236250852, "grad_norm": 0.76171875, "learning_rate": 0.00196437963177452, "loss": 0.319, "step": 9538 }, { "epoch": 0.01691520852781834, "grad_norm": 0.5625, "learning_rate": 0.001964363050982156, "loss": 0.2335, "step": 9540 }, { "epoch": 0.016918754693128153, "grad_norm": 0.2490234375, "learning_rate": 0.001964346466409569, "loss": 0.2173, "step": 9542 }, { "epoch": 0.016922300858437968, "grad_norm": 0.53515625, "learning_rate": 0.0019643298780568333, "loss": 0.2494, "step": 9544 }, { "epoch": 0.016925847023747782, "grad_norm": 1.0859375, "learning_rate": 0.001964313285924021, "loss": 0.2045, "step": 9546 }, { "epoch": 0.016929393189057597, "grad_norm": 0.41796875, "learning_rate": 0.001964296690011204, "loss": 0.1743, "step": 9548 }, { "epoch": 0.01693293935436741, "grad_norm": 2.140625, "learning_rate": 0.0019642800903184555, "loss": 0.4388, "step": 9550 }, { "epoch": 0.01693648551967723, "grad_norm": 0.330078125, "learning_rate": 0.001964263486845848, "loss": 0.2515, "step": 9552 }, { "epoch": 0.016940031684987044, "grad_norm": 1.5703125, "learning_rate": 0.0019642468795934534, "loss": 0.3165, "step": 9554 }, { "epoch": 0.01694357785029686, "grad_norm": 0.326171875, "learning_rate": 0.001964230268561346, "loss": 0.2904, "step": 9556 }, { "epoch": 0.016947124015606673, "grad_norm": 0.490234375, "learning_rate": 0.001964213653749597, "loss": 0.2365, "step": 9558 }, { "epoch": 0.016950670180916488, "grad_norm": 0.53125, "learning_rate": 0.001964197035158279, "loss": 0.2333, "step": 9560 }, { "epoch": 0.016954216346226302, "grad_norm": 0.38671875, "learning_rate": 0.001964180412787466, "loss": 0.2503, "step": 9562 }, { "epoch": 0.01695776251153612, "grad_norm": 0.337890625, "learning_rate": 0.0019641637866372294, "loss": 0.32, "step": 9564 }, { "epoch": 0.016961308676845935, "grad_norm": 0.51953125, "learning_rate": 0.0019641471567076424, "loss": 0.3051, "step": 9566 }, { "epoch": 0.01696485484215575, "grad_norm": 0.55859375, "learning_rate": 0.0019641305229987772, "loss": 0.2374, "step": 9568 }, { "epoch": 0.016968401007465564, "grad_norm": 0.42578125, "learning_rate": 0.0019641138855107073, "loss": 0.2053, "step": 9570 }, { "epoch": 0.01697194717277538, "grad_norm": 1.21875, "learning_rate": 0.0019640972442435058, "loss": 0.3455, "step": 9572 }, { "epoch": 0.016975493338085196, "grad_norm": 0.4765625, "learning_rate": 0.0019640805991972443, "loss": 0.237, "step": 9574 }, { "epoch": 0.01697903950339501, "grad_norm": 0.341796875, "learning_rate": 0.001964063950371996, "loss": 0.1935, "step": 9576 }, { "epoch": 0.016982585668704826, "grad_norm": 1.1640625, "learning_rate": 0.0019640472977678347, "loss": 0.2765, "step": 9578 }, { "epoch": 0.01698613183401464, "grad_norm": 0.40625, "learning_rate": 0.001964030641384832, "loss": 0.1984, "step": 9580 }, { "epoch": 0.016989677999324455, "grad_norm": 0.8671875, "learning_rate": 0.001964013981223061, "loss": 0.2682, "step": 9582 }, { "epoch": 0.01699322416463427, "grad_norm": 1.671875, "learning_rate": 0.001963997317282595, "loss": 0.4339, "step": 9584 }, { "epoch": 0.016996770329944087, "grad_norm": 2.640625, "learning_rate": 0.001963980649563506, "loss": 0.285, "step": 9586 }, { "epoch": 0.017000316495253902, "grad_norm": 0.71484375, "learning_rate": 0.0019639639780658683, "loss": 0.2807, "step": 9588 }, { "epoch": 0.017003862660563716, "grad_norm": 0.44921875, "learning_rate": 0.0019639473027897535, "loss": 0.1932, "step": 9590 }, { "epoch": 0.01700740882587353, "grad_norm": 0.62890625, "learning_rate": 0.0019639306237352354, "loss": 0.2416, "step": 9592 }, { "epoch": 0.017010954991183345, "grad_norm": 0.6171875, "learning_rate": 0.0019639139409023862, "loss": 0.2396, "step": 9594 }, { "epoch": 0.01701450115649316, "grad_norm": 0.875, "learning_rate": 0.0019638972542912795, "loss": 0.2727, "step": 9596 }, { "epoch": 0.017018047321802978, "grad_norm": 0.408203125, "learning_rate": 0.001963880563901988, "loss": 0.2144, "step": 9598 }, { "epoch": 0.017021593487112793, "grad_norm": 1.0625, "learning_rate": 0.001963863869734585, "loss": 0.2307, "step": 9600 }, { "epoch": 0.017025139652422607, "grad_norm": 0.423828125, "learning_rate": 0.0019638471717891423, "loss": 0.2664, "step": 9602 }, { "epoch": 0.01702868581773242, "grad_norm": 0.5546875, "learning_rate": 0.0019638304700657343, "loss": 0.1799, "step": 9604 }, { "epoch": 0.017032231983042236, "grad_norm": 1.0859375, "learning_rate": 0.001963813764564434, "loss": 0.2072, "step": 9606 }, { "epoch": 0.017035778148352054, "grad_norm": 1.1640625, "learning_rate": 0.001963797055285314, "loss": 0.2371, "step": 9608 }, { "epoch": 0.01703932431366187, "grad_norm": 1.25, "learning_rate": 0.001963780342228447, "loss": 0.2137, "step": 9610 }, { "epoch": 0.017042870478971683, "grad_norm": 0.63671875, "learning_rate": 0.0019637636253939067, "loss": 0.1847, "step": 9612 }, { "epoch": 0.017046416644281498, "grad_norm": 1.7578125, "learning_rate": 0.001963746904781766, "loss": 0.2864, "step": 9614 }, { "epoch": 0.017049962809591312, "grad_norm": 6.8125, "learning_rate": 0.0019637301803920983, "loss": 0.3559, "step": 9616 }, { "epoch": 0.017053508974901127, "grad_norm": 0.8046875, "learning_rate": 0.0019637134522249764, "loss": 0.2836, "step": 9618 }, { "epoch": 0.017057055140210945, "grad_norm": 0.35546875, "learning_rate": 0.0019636967202804733, "loss": 0.4623, "step": 9620 }, { "epoch": 0.01706060130552076, "grad_norm": 0.369140625, "learning_rate": 0.0019636799845586628, "loss": 0.2132, "step": 9622 }, { "epoch": 0.017064147470830574, "grad_norm": 1.03125, "learning_rate": 0.0019636632450596176, "loss": 0.2257, "step": 9624 }, { "epoch": 0.01706769363614039, "grad_norm": 0.40234375, "learning_rate": 0.0019636465017834107, "loss": 0.2445, "step": 9626 }, { "epoch": 0.017071239801450203, "grad_norm": 0.83203125, "learning_rate": 0.0019636297547301162, "loss": 0.2282, "step": 9628 }, { "epoch": 0.017074785966760018, "grad_norm": 0.498046875, "learning_rate": 0.0019636130038998066, "loss": 0.2248, "step": 9630 }, { "epoch": 0.017078332132069836, "grad_norm": 0.3984375, "learning_rate": 0.0019635962492925555, "loss": 0.2048, "step": 9632 }, { "epoch": 0.01708187829737965, "grad_norm": 0.427734375, "learning_rate": 0.001963579490908436, "loss": 0.2065, "step": 9634 }, { "epoch": 0.017085424462689465, "grad_norm": 0.421875, "learning_rate": 0.001963562728747521, "loss": 0.2628, "step": 9636 }, { "epoch": 0.01708897062799928, "grad_norm": 0.703125, "learning_rate": 0.001963545962809885, "loss": 0.1987, "step": 9638 }, { "epoch": 0.017092516793309094, "grad_norm": 0.54296875, "learning_rate": 0.0019635291930955997, "loss": 0.2323, "step": 9640 }, { "epoch": 0.017096062958618912, "grad_norm": 0.5703125, "learning_rate": 0.00196351241960474, "loss": 0.2546, "step": 9642 }, { "epoch": 0.017099609123928727, "grad_norm": 0.341796875, "learning_rate": 0.001963495642337378, "loss": 0.212, "step": 9644 }, { "epoch": 0.01710315528923854, "grad_norm": 0.6484375, "learning_rate": 0.0019634788612935884, "loss": 0.2484, "step": 9646 }, { "epoch": 0.017106701454548356, "grad_norm": 0.6484375, "learning_rate": 0.001963462076473443, "loss": 0.2394, "step": 9648 }, { "epoch": 0.01711024761985817, "grad_norm": 0.443359375, "learning_rate": 0.001963445287877017, "loss": 0.2231, "step": 9650 }, { "epoch": 0.017113793785167985, "grad_norm": 0.56640625, "learning_rate": 0.0019634284955043816, "loss": 0.2258, "step": 9652 }, { "epoch": 0.017117339950477803, "grad_norm": 0.8671875, "learning_rate": 0.0019634116993556125, "loss": 0.2194, "step": 9654 }, { "epoch": 0.017120886115787617, "grad_norm": 0.45703125, "learning_rate": 0.001963394899430782, "loss": 0.2386, "step": 9656 }, { "epoch": 0.017124432281097432, "grad_norm": 0.9453125, "learning_rate": 0.0019633780957299635, "loss": 0.2901, "step": 9658 }, { "epoch": 0.017127978446407247, "grad_norm": 0.416015625, "learning_rate": 0.0019633612882532306, "loss": 0.2492, "step": 9660 }, { "epoch": 0.01713152461171706, "grad_norm": 0.4609375, "learning_rate": 0.001963344477000657, "loss": 0.2727, "step": 9662 }, { "epoch": 0.017135070777026876, "grad_norm": 0.255859375, "learning_rate": 0.0019633276619723163, "loss": 0.2561, "step": 9664 }, { "epoch": 0.017138616942336694, "grad_norm": 0.875, "learning_rate": 0.001963310843168282, "loss": 0.2272, "step": 9666 }, { "epoch": 0.017142163107646508, "grad_norm": 0.330078125, "learning_rate": 0.0019632940205886276, "loss": 0.2567, "step": 9668 }, { "epoch": 0.017145709272956323, "grad_norm": 0.890625, "learning_rate": 0.001963277194233426, "loss": 0.3432, "step": 9670 }, { "epoch": 0.017149255438266137, "grad_norm": 2.359375, "learning_rate": 0.001963260364102752, "loss": 0.3546, "step": 9672 }, { "epoch": 0.017152801603575952, "grad_norm": 0.46484375, "learning_rate": 0.0019632435301966786, "loss": 0.2367, "step": 9674 }, { "epoch": 0.01715634776888577, "grad_norm": 0.7109375, "learning_rate": 0.0019632266925152793, "loss": 0.2208, "step": 9676 }, { "epoch": 0.017159893934195584, "grad_norm": 0.2490234375, "learning_rate": 0.0019632098510586277, "loss": 0.1687, "step": 9678 }, { "epoch": 0.0171634400995054, "grad_norm": 0.279296875, "learning_rate": 0.0019631930058267976, "loss": 0.2545, "step": 9680 }, { "epoch": 0.017166986264815214, "grad_norm": 0.427734375, "learning_rate": 0.001963176156819863, "loss": 0.2024, "step": 9682 }, { "epoch": 0.017170532430125028, "grad_norm": 0.349609375, "learning_rate": 0.001963159304037897, "loss": 0.2608, "step": 9684 }, { "epoch": 0.017174078595434843, "grad_norm": 0.52734375, "learning_rate": 0.0019631424474809735, "loss": 0.2846, "step": 9686 }, { "epoch": 0.01717762476074466, "grad_norm": 5.6875, "learning_rate": 0.0019631255871491666, "loss": 0.385, "step": 9688 }, { "epoch": 0.017181170926054475, "grad_norm": 0.7890625, "learning_rate": 0.0019631087230425493, "loss": 0.1695, "step": 9690 }, { "epoch": 0.01718471709136429, "grad_norm": 0.4609375, "learning_rate": 0.0019630918551611963, "loss": 0.2207, "step": 9692 }, { "epoch": 0.017188263256674104, "grad_norm": 1.0078125, "learning_rate": 0.001963074983505181, "loss": 0.2661, "step": 9694 }, { "epoch": 0.01719180942198392, "grad_norm": 0.43359375, "learning_rate": 0.001963058108074577, "loss": 0.3198, "step": 9696 }, { "epoch": 0.017195355587293733, "grad_norm": 0.240234375, "learning_rate": 0.0019630412288694577, "loss": 0.19, "step": 9698 }, { "epoch": 0.01719890175260355, "grad_norm": 0.287109375, "learning_rate": 0.0019630243458898977, "loss": 0.1994, "step": 9700 }, { "epoch": 0.017202447917913366, "grad_norm": 0.419921875, "learning_rate": 0.0019630074591359702, "loss": 0.2371, "step": 9702 }, { "epoch": 0.01720599408322318, "grad_norm": 0.65625, "learning_rate": 0.00196299056860775, "loss": 0.2823, "step": 9704 }, { "epoch": 0.017209540248532995, "grad_norm": 0.71484375, "learning_rate": 0.0019629736743053097, "loss": 0.2987, "step": 9706 }, { "epoch": 0.01721308641384281, "grad_norm": 0.41796875, "learning_rate": 0.001962956776228724, "loss": 0.2761, "step": 9708 }, { "epoch": 0.017216632579152628, "grad_norm": 0.404296875, "learning_rate": 0.001962939874378067, "loss": 0.2157, "step": 9710 }, { "epoch": 0.017220178744462442, "grad_norm": 0.6875, "learning_rate": 0.0019629229687534114, "loss": 0.2217, "step": 9712 }, { "epoch": 0.017223724909772257, "grad_norm": 0.298828125, "learning_rate": 0.0019629060593548326, "loss": 0.276, "step": 9714 }, { "epoch": 0.01722727107508207, "grad_norm": 0.314453125, "learning_rate": 0.001962889146182404, "loss": 0.2515, "step": 9716 }, { "epoch": 0.017230817240391886, "grad_norm": 0.404296875, "learning_rate": 0.0019628722292361995, "loss": 0.246, "step": 9718 }, { "epoch": 0.0172343634057017, "grad_norm": 0.431640625, "learning_rate": 0.0019628553085162927, "loss": 0.2115, "step": 9720 }, { "epoch": 0.01723790957101152, "grad_norm": 0.515625, "learning_rate": 0.001962838384022758, "loss": 0.1892, "step": 9722 }, { "epoch": 0.017241455736321333, "grad_norm": 0.40625, "learning_rate": 0.0019628214557556698, "loss": 0.2051, "step": 9724 }, { "epoch": 0.017245001901631148, "grad_norm": 0.9140625, "learning_rate": 0.0019628045237151015, "loss": 0.1695, "step": 9726 }, { "epoch": 0.017248548066940962, "grad_norm": 0.9765625, "learning_rate": 0.0019627875879011276, "loss": 0.3314, "step": 9728 }, { "epoch": 0.017252094232250777, "grad_norm": 6.09375, "learning_rate": 0.001962770648313822, "loss": 0.4471, "step": 9730 }, { "epoch": 0.01725564039756059, "grad_norm": 0.435546875, "learning_rate": 0.0019627537049532583, "loss": 0.2151, "step": 9732 }, { "epoch": 0.01725918656287041, "grad_norm": 0.61328125, "learning_rate": 0.0019627367578195112, "loss": 0.2691, "step": 9734 }, { "epoch": 0.017262732728180224, "grad_norm": 0.28515625, "learning_rate": 0.0019627198069126547, "loss": 0.3818, "step": 9736 }, { "epoch": 0.01726627889349004, "grad_norm": 0.44921875, "learning_rate": 0.001962702852232763, "loss": 0.3112, "step": 9738 }, { "epoch": 0.017269825058799853, "grad_norm": 0.419921875, "learning_rate": 0.0019626858937799104, "loss": 0.2058, "step": 9740 }, { "epoch": 0.017273371224109667, "grad_norm": 0.353515625, "learning_rate": 0.0019626689315541705, "loss": 0.2172, "step": 9742 }, { "epoch": 0.017276917389419486, "grad_norm": 0.88671875, "learning_rate": 0.0019626519655556174, "loss": 0.3053, "step": 9744 }, { "epoch": 0.0172804635547293, "grad_norm": 0.2890625, "learning_rate": 0.001962634995784326, "loss": 0.2005, "step": 9746 }, { "epoch": 0.017284009720039115, "grad_norm": 0.330078125, "learning_rate": 0.0019626180222403703, "loss": 0.2145, "step": 9748 }, { "epoch": 0.01728755588534893, "grad_norm": 0.703125, "learning_rate": 0.0019626010449238247, "loss": 0.3239, "step": 9750 }, { "epoch": 0.017291102050658744, "grad_norm": 0.62890625, "learning_rate": 0.001962584063834763, "loss": 0.2424, "step": 9752 }, { "epoch": 0.01729464821596856, "grad_norm": 0.3515625, "learning_rate": 0.001962567078973259, "loss": 0.2768, "step": 9754 }, { "epoch": 0.017298194381278376, "grad_norm": 2.96875, "learning_rate": 0.0019625500903393883, "loss": 0.2033, "step": 9756 }, { "epoch": 0.01730174054658819, "grad_norm": 0.6953125, "learning_rate": 0.0019625330979332247, "loss": 0.2733, "step": 9758 }, { "epoch": 0.017305286711898005, "grad_norm": 0.64453125, "learning_rate": 0.001962516101754842, "loss": 0.346, "step": 9760 }, { "epoch": 0.01730883287720782, "grad_norm": 0.353515625, "learning_rate": 0.001962499101804315, "loss": 0.2294, "step": 9762 }, { "epoch": 0.017312379042517635, "grad_norm": 1.0859375, "learning_rate": 0.001962482098081718, "loss": 0.2946, "step": 9764 }, { "epoch": 0.01731592520782745, "grad_norm": 0.322265625, "learning_rate": 0.0019624650905871246, "loss": 0.2107, "step": 9766 }, { "epoch": 0.017319471373137267, "grad_norm": 1.1171875, "learning_rate": 0.001962448079320611, "loss": 0.2931, "step": 9768 }, { "epoch": 0.01732301753844708, "grad_norm": 0.435546875, "learning_rate": 0.0019624310642822494, "loss": 0.3014, "step": 9770 }, { "epoch": 0.017326563703756896, "grad_norm": 4.71875, "learning_rate": 0.001962414045472116, "loss": 0.3846, "step": 9772 }, { "epoch": 0.01733010986906671, "grad_norm": 0.9765625, "learning_rate": 0.001962397022890284, "loss": 0.2053, "step": 9774 }, { "epoch": 0.017333656034376525, "grad_norm": 0.478515625, "learning_rate": 0.0019623799965368294, "loss": 0.2185, "step": 9776 }, { "epoch": 0.017337202199686343, "grad_norm": 0.92578125, "learning_rate": 0.0019623629664118247, "loss": 0.3544, "step": 9778 }, { "epoch": 0.017340748364996158, "grad_norm": 0.42578125, "learning_rate": 0.0019623459325153456, "loss": 0.2414, "step": 9780 }, { "epoch": 0.017344294530305972, "grad_norm": 5.65625, "learning_rate": 0.001962328894847466, "loss": 0.3282, "step": 9782 }, { "epoch": 0.017347840695615787, "grad_norm": 0.93359375, "learning_rate": 0.0019623118534082607, "loss": 0.3129, "step": 9784 }, { "epoch": 0.0173513868609256, "grad_norm": 0.703125, "learning_rate": 0.0019622948081978045, "loss": 0.2256, "step": 9786 }, { "epoch": 0.017354933026235416, "grad_norm": 0.380859375, "learning_rate": 0.001962277759216172, "loss": 0.2163, "step": 9788 }, { "epoch": 0.017358479191545234, "grad_norm": 0.353515625, "learning_rate": 0.001962260706463437, "loss": 0.2617, "step": 9790 }, { "epoch": 0.01736202535685505, "grad_norm": 0.423828125, "learning_rate": 0.0019622436499396744, "loss": 0.2128, "step": 9792 }, { "epoch": 0.017365571522164863, "grad_norm": 0.3125, "learning_rate": 0.0019622265896449592, "loss": 0.2186, "step": 9794 }, { "epoch": 0.017369117687474678, "grad_norm": 0.8984375, "learning_rate": 0.001962209525579366, "loss": 0.28, "step": 9796 }, { "epoch": 0.017372663852784492, "grad_norm": 0.78125, "learning_rate": 0.0019621924577429687, "loss": 0.2019, "step": 9798 }, { "epoch": 0.017376210018094307, "grad_norm": 0.67578125, "learning_rate": 0.0019621753861358425, "loss": 0.2743, "step": 9800 }, { "epoch": 0.017379756183404125, "grad_norm": 0.61328125, "learning_rate": 0.0019621583107580624, "loss": 0.2726, "step": 9802 }, { "epoch": 0.01738330234871394, "grad_norm": 2.09375, "learning_rate": 0.001962141231609702, "loss": 0.3481, "step": 9804 }, { "epoch": 0.017386848514023754, "grad_norm": 0.2890625, "learning_rate": 0.001962124148690837, "loss": 0.1917, "step": 9806 }, { "epoch": 0.01739039467933357, "grad_norm": 0.63671875, "learning_rate": 0.001962107062001542, "loss": 0.1999, "step": 9808 }, { "epoch": 0.017393940844643383, "grad_norm": 0.51171875, "learning_rate": 0.0019620899715418905, "loss": 0.2412, "step": 9810 }, { "epoch": 0.0173974870099532, "grad_norm": 0.91796875, "learning_rate": 0.0019620728773119592, "loss": 0.2498, "step": 9812 }, { "epoch": 0.017401033175263016, "grad_norm": 0.41796875, "learning_rate": 0.0019620557793118215, "loss": 0.247, "step": 9814 }, { "epoch": 0.01740457934057283, "grad_norm": 0.48046875, "learning_rate": 0.001962038677541553, "loss": 0.2054, "step": 9816 }, { "epoch": 0.017408125505882645, "grad_norm": 0.93359375, "learning_rate": 0.0019620215720012276, "loss": 0.329, "step": 9818 }, { "epoch": 0.01741167167119246, "grad_norm": 0.55078125, "learning_rate": 0.0019620044626909206, "loss": 0.2947, "step": 9820 }, { "epoch": 0.017415217836502274, "grad_norm": 0.52734375, "learning_rate": 0.001961987349610707, "loss": 0.1959, "step": 9822 }, { "epoch": 0.017418764001812092, "grad_norm": 10.0, "learning_rate": 0.0019619702327606614, "loss": 0.335, "step": 9824 }, { "epoch": 0.017422310167121906, "grad_norm": 0.318359375, "learning_rate": 0.0019619531121408585, "loss": 0.2384, "step": 9826 }, { "epoch": 0.01742585633243172, "grad_norm": 0.84375, "learning_rate": 0.0019619359877513735, "loss": 0.3579, "step": 9828 }, { "epoch": 0.017429402497741536, "grad_norm": 0.314453125, "learning_rate": 0.001961918859592281, "loss": 0.2646, "step": 9830 }, { "epoch": 0.01743294866305135, "grad_norm": 0.388671875, "learning_rate": 0.001961901727663656, "loss": 0.216, "step": 9832 }, { "epoch": 0.017436494828361165, "grad_norm": 0.60546875, "learning_rate": 0.001961884591965574, "loss": 0.2314, "step": 9834 }, { "epoch": 0.017440040993670983, "grad_norm": 0.251953125, "learning_rate": 0.0019618674524981092, "loss": 0.253, "step": 9836 }, { "epoch": 0.017443587158980797, "grad_norm": 0.53125, "learning_rate": 0.0019618503092613364, "loss": 0.3197, "step": 9838 }, { "epoch": 0.017447133324290612, "grad_norm": 2.8125, "learning_rate": 0.001961833162255331, "loss": 0.3736, "step": 9840 }, { "epoch": 0.017450679489600426, "grad_norm": 0.4609375, "learning_rate": 0.0019618160114801687, "loss": 0.2202, "step": 9842 }, { "epoch": 0.01745422565491024, "grad_norm": 0.455078125, "learning_rate": 0.001961798856935923, "loss": 0.2819, "step": 9844 }, { "epoch": 0.01745777182022006, "grad_norm": 0.5625, "learning_rate": 0.0019617816986226702, "loss": 0.2464, "step": 9846 }, { "epoch": 0.017461317985529873, "grad_norm": 0.56640625, "learning_rate": 0.0019617645365404847, "loss": 0.2757, "step": 9848 }, { "epoch": 0.017464864150839688, "grad_norm": 0.83203125, "learning_rate": 0.0019617473706894416, "loss": 0.2038, "step": 9850 }, { "epoch": 0.017468410316149503, "grad_norm": 1.15625, "learning_rate": 0.0019617302010696163, "loss": 0.4924, "step": 9852 }, { "epoch": 0.017471956481459317, "grad_norm": 0.384765625, "learning_rate": 0.0019617130276810835, "loss": 0.242, "step": 9854 }, { "epoch": 0.01747550264676913, "grad_norm": 1.1953125, "learning_rate": 0.001961695850523918, "loss": 0.3336, "step": 9856 }, { "epoch": 0.01747904881207895, "grad_norm": 0.376953125, "learning_rate": 0.001961678669598196, "loss": 0.1924, "step": 9858 }, { "epoch": 0.017482594977388764, "grad_norm": 0.310546875, "learning_rate": 0.001961661484903992, "loss": 0.2198, "step": 9860 }, { "epoch": 0.01748614114269858, "grad_norm": 0.400390625, "learning_rate": 0.001961644296441381, "loss": 0.3027, "step": 9862 }, { "epoch": 0.017489687308008393, "grad_norm": 0.359375, "learning_rate": 0.0019616271042104385, "loss": 0.2166, "step": 9864 }, { "epoch": 0.017493233473318208, "grad_norm": 0.515625, "learning_rate": 0.0019616099082112393, "loss": 0.2122, "step": 9866 }, { "epoch": 0.017496779638628022, "grad_norm": 0.4375, "learning_rate": 0.0019615927084438585, "loss": 0.3945, "step": 9868 }, { "epoch": 0.01750032580393784, "grad_norm": 16.0, "learning_rate": 0.001961575504908372, "loss": 0.3347, "step": 9870 }, { "epoch": 0.017503871969247655, "grad_norm": 1.59375, "learning_rate": 0.0019615582976048552, "loss": 0.3852, "step": 9872 }, { "epoch": 0.01750741813455747, "grad_norm": 1.015625, "learning_rate": 0.0019615410865333823, "loss": 0.35, "step": 9874 }, { "epoch": 0.017510964299867284, "grad_norm": 0.416015625, "learning_rate": 0.0019615238716940296, "loss": 0.2599, "step": 9876 }, { "epoch": 0.0175145104651771, "grad_norm": 1.328125, "learning_rate": 0.0019615066530868715, "loss": 0.3203, "step": 9878 }, { "epoch": 0.017518056630486917, "grad_norm": 0.5703125, "learning_rate": 0.0019614894307119837, "loss": 0.3224, "step": 9880 }, { "epoch": 0.01752160279579673, "grad_norm": 0.83203125, "learning_rate": 0.0019614722045694414, "loss": 0.2866, "step": 9882 }, { "epoch": 0.017525148961106546, "grad_norm": 0.4609375, "learning_rate": 0.0019614549746593208, "loss": 0.2413, "step": 9884 }, { "epoch": 0.01752869512641636, "grad_norm": 0.46484375, "learning_rate": 0.001961437740981696, "loss": 0.2427, "step": 9886 }, { "epoch": 0.017532241291726175, "grad_norm": 0.40625, "learning_rate": 0.001961420503536643, "loss": 0.2393, "step": 9888 }, { "epoch": 0.01753578745703599, "grad_norm": 0.28125, "learning_rate": 0.001961403262324237, "loss": 0.221, "step": 9890 }, { "epoch": 0.017539333622345808, "grad_norm": 0.357421875, "learning_rate": 0.0019613860173445536, "loss": 0.2751, "step": 9892 }, { "epoch": 0.017542879787655622, "grad_norm": 0.302734375, "learning_rate": 0.001961368768597668, "loss": 0.2411, "step": 9894 }, { "epoch": 0.017546425952965437, "grad_norm": 1.9140625, "learning_rate": 0.0019613515160836563, "loss": 0.4987, "step": 9896 }, { "epoch": 0.01754997211827525, "grad_norm": 0.40625, "learning_rate": 0.0019613342598025925, "loss": 0.1701, "step": 9898 }, { "epoch": 0.017553518283585066, "grad_norm": 0.66796875, "learning_rate": 0.0019613169997545533, "loss": 0.2124, "step": 9900 }, { "epoch": 0.01755706444889488, "grad_norm": 0.9140625, "learning_rate": 0.001961299735939614, "loss": 0.2289, "step": 9902 }, { "epoch": 0.0175606106142047, "grad_norm": 0.439453125, "learning_rate": 0.00196128246835785, "loss": 0.2478, "step": 9904 }, { "epoch": 0.017564156779514513, "grad_norm": 0.48828125, "learning_rate": 0.0019612651970093366, "loss": 0.2447, "step": 9906 }, { "epoch": 0.017567702944824327, "grad_norm": 0.314453125, "learning_rate": 0.0019612479218941497, "loss": 0.2238, "step": 9908 }, { "epoch": 0.017571249110134142, "grad_norm": 0.4765625, "learning_rate": 0.0019612306430123647, "loss": 0.2221, "step": 9910 }, { "epoch": 0.017574795275443957, "grad_norm": 0.482421875, "learning_rate": 0.0019612133603640566, "loss": 0.3662, "step": 9912 }, { "epoch": 0.017578341440753775, "grad_norm": 0.37109375, "learning_rate": 0.001961196073949302, "loss": 0.2078, "step": 9914 }, { "epoch": 0.01758188760606359, "grad_norm": 1.4140625, "learning_rate": 0.001961178783768176, "loss": 0.3762, "step": 9916 }, { "epoch": 0.017585433771373404, "grad_norm": 0.625, "learning_rate": 0.001961161489820754, "loss": 0.3405, "step": 9918 }, { "epoch": 0.017588979936683218, "grad_norm": 0.455078125, "learning_rate": 0.001961144192107112, "loss": 0.3045, "step": 9920 }, { "epoch": 0.017592526101993033, "grad_norm": 1.5859375, "learning_rate": 0.001961126890627326, "loss": 0.2774, "step": 9922 }, { "epoch": 0.017596072267302847, "grad_norm": 0.37109375, "learning_rate": 0.0019611095853814702, "loss": 0.2138, "step": 9924 }, { "epoch": 0.017599618432612665, "grad_norm": 0.46484375, "learning_rate": 0.0019610922763696223, "loss": 0.2308, "step": 9926 }, { "epoch": 0.01760316459792248, "grad_norm": 0.87890625, "learning_rate": 0.001961074963591856, "loss": 0.2296, "step": 9928 }, { "epoch": 0.017606710763232294, "grad_norm": 1.671875, "learning_rate": 0.0019610576470482487, "loss": 0.4829, "step": 9930 }, { "epoch": 0.01761025692854211, "grad_norm": 0.5546875, "learning_rate": 0.001961040326738875, "loss": 0.3005, "step": 9932 }, { "epoch": 0.017613803093851924, "grad_norm": 0.2578125, "learning_rate": 0.001961023002663811, "loss": 0.1953, "step": 9934 }, { "epoch": 0.017617349259161738, "grad_norm": 0.95703125, "learning_rate": 0.0019610056748231327, "loss": 0.2426, "step": 9936 }, { "epoch": 0.017620895424471556, "grad_norm": 0.51171875, "learning_rate": 0.001960988343216916, "loss": 0.1865, "step": 9938 }, { "epoch": 0.01762444158978137, "grad_norm": 2.046875, "learning_rate": 0.0019609710078452363, "loss": 0.4257, "step": 9940 }, { "epoch": 0.017627987755091185, "grad_norm": 0.74609375, "learning_rate": 0.0019609536687081692, "loss": 0.223, "step": 9942 }, { "epoch": 0.017631533920401, "grad_norm": 0.20703125, "learning_rate": 0.001960936325805791, "loss": 0.1712, "step": 9944 }, { "epoch": 0.017635080085710814, "grad_norm": 0.56640625, "learning_rate": 0.0019609189791381775, "loss": 0.2821, "step": 9946 }, { "epoch": 0.017638626251020632, "grad_norm": 0.443359375, "learning_rate": 0.0019609016287054043, "loss": 0.2669, "step": 9948 }, { "epoch": 0.017642172416330447, "grad_norm": 2.484375, "learning_rate": 0.0019608842745075477, "loss": 0.2853, "step": 9950 }, { "epoch": 0.01764571858164026, "grad_norm": 1.7109375, "learning_rate": 0.0019608669165446834, "loss": 0.2976, "step": 9952 }, { "epoch": 0.017649264746950076, "grad_norm": 0.73046875, "learning_rate": 0.001960849554816887, "loss": 0.271, "step": 9954 }, { "epoch": 0.01765281091225989, "grad_norm": 0.455078125, "learning_rate": 0.0019608321893242343, "loss": 0.2153, "step": 9956 }, { "epoch": 0.017656357077569705, "grad_norm": 0.458984375, "learning_rate": 0.001960814820066802, "loss": 0.2775, "step": 9958 }, { "epoch": 0.017659903242879523, "grad_norm": 0.46484375, "learning_rate": 0.0019607974470446663, "loss": 0.2958, "step": 9960 }, { "epoch": 0.017663449408189338, "grad_norm": 0.267578125, "learning_rate": 0.001960780070257902, "loss": 0.2829, "step": 9962 }, { "epoch": 0.017666995573499152, "grad_norm": 0.41015625, "learning_rate": 0.0019607626897065857, "loss": 0.2188, "step": 9964 }, { "epoch": 0.017670541738808967, "grad_norm": 0.71484375, "learning_rate": 0.0019607453053907932, "loss": 0.3304, "step": 9966 }, { "epoch": 0.01767408790411878, "grad_norm": 0.337890625, "learning_rate": 0.001960727917310601, "loss": 0.2613, "step": 9968 }, { "epoch": 0.017677634069428596, "grad_norm": 0.28515625, "learning_rate": 0.0019607105254660848, "loss": 0.3164, "step": 9970 }, { "epoch": 0.017681180234738414, "grad_norm": 0.72265625, "learning_rate": 0.0019606931298573205, "loss": 0.2424, "step": 9972 }, { "epoch": 0.01768472640004823, "grad_norm": 0.5390625, "learning_rate": 0.0019606757304843846, "loss": 0.2003, "step": 9974 }, { "epoch": 0.017688272565358043, "grad_norm": 0.314453125, "learning_rate": 0.0019606583273473533, "loss": 0.1778, "step": 9976 }, { "epoch": 0.017691818730667858, "grad_norm": 0.458984375, "learning_rate": 0.001960640920446302, "loss": 0.2982, "step": 9978 }, { "epoch": 0.017695364895977672, "grad_norm": 0.36328125, "learning_rate": 0.0019606235097813073, "loss": 0.2776, "step": 9980 }, { "epoch": 0.01769891106128749, "grad_norm": 0.416015625, "learning_rate": 0.0019606060953524453, "loss": 0.208, "step": 9982 }, { "epoch": 0.017702457226597305, "grad_norm": 0.328125, "learning_rate": 0.0019605886771597923, "loss": 0.2513, "step": 9984 }, { "epoch": 0.01770600339190712, "grad_norm": 0.38671875, "learning_rate": 0.001960571255203424, "loss": 0.2291, "step": 9986 }, { "epoch": 0.017709549557216934, "grad_norm": 0.6171875, "learning_rate": 0.001960553829483417, "loss": 0.2553, "step": 9988 }, { "epoch": 0.01771309572252675, "grad_norm": 0.40234375, "learning_rate": 0.0019605363999998476, "loss": 0.1867, "step": 9990 }, { "epoch": 0.017716641887836563, "grad_norm": 0.244140625, "learning_rate": 0.001960518966752792, "loss": 0.2054, "step": 9992 }, { "epoch": 0.01772018805314638, "grad_norm": 0.46875, "learning_rate": 0.001960501529742326, "loss": 0.3144, "step": 9994 }, { "epoch": 0.017723734218456196, "grad_norm": 0.62109375, "learning_rate": 0.001960484088968526, "loss": 0.2266, "step": 9996 }, { "epoch": 0.01772728038376601, "grad_norm": 0.6953125, "learning_rate": 0.001960466644431469, "loss": 0.2959, "step": 9998 }, { "epoch": 0.017730826549075825, "grad_norm": 3.296875, "learning_rate": 0.00196044919613123, "loss": 0.3539, "step": 10000 }, { "epoch": 0.01773437271438564, "grad_norm": 0.3515625, "learning_rate": 0.001960431744067886, "loss": 0.2411, "step": 10002 }, { "epoch": 0.017737918879695454, "grad_norm": 0.64453125, "learning_rate": 0.0019604142882415137, "loss": 0.2444, "step": 10004 }, { "epoch": 0.017741465045005272, "grad_norm": 0.40625, "learning_rate": 0.001960396828652189, "loss": 0.2599, "step": 10006 }, { "epoch": 0.017745011210315086, "grad_norm": 0.5625, "learning_rate": 0.0019603793652999886, "loss": 0.2996, "step": 10008 }, { "epoch": 0.0177485573756249, "grad_norm": 0.458984375, "learning_rate": 0.0019603618981849885, "loss": 0.413, "step": 10010 }, { "epoch": 0.017752103540934715, "grad_norm": 1.1953125, "learning_rate": 0.0019603444273072652, "loss": 0.3284, "step": 10012 }, { "epoch": 0.01775564970624453, "grad_norm": 0.369140625, "learning_rate": 0.001960326952666895, "loss": 0.2157, "step": 10014 }, { "epoch": 0.017759195871554348, "grad_norm": 1.6171875, "learning_rate": 0.001960309474263955, "loss": 0.2594, "step": 10016 }, { "epoch": 0.017762742036864163, "grad_norm": 0.57421875, "learning_rate": 0.0019602919920985204, "loss": 0.2838, "step": 10018 }, { "epoch": 0.017766288202173977, "grad_norm": 0.7578125, "learning_rate": 0.0019602745061706684, "loss": 0.2199, "step": 10020 }, { "epoch": 0.01776983436748379, "grad_norm": 0.48828125, "learning_rate": 0.001960257016480476, "loss": 0.2057, "step": 10022 }, { "epoch": 0.017773380532793606, "grad_norm": 0.40625, "learning_rate": 0.0019602395230280184, "loss": 0.1974, "step": 10024 }, { "epoch": 0.01777692669810342, "grad_norm": 0.69921875, "learning_rate": 0.0019602220258133733, "loss": 0.2449, "step": 10026 }, { "epoch": 0.01778047286341324, "grad_norm": 0.28125, "learning_rate": 0.001960204524836616, "loss": 0.2083, "step": 10028 }, { "epoch": 0.017784019028723053, "grad_norm": 0.80859375, "learning_rate": 0.001960187020097825, "loss": 0.3281, "step": 10030 }, { "epoch": 0.017787565194032868, "grad_norm": 0.31640625, "learning_rate": 0.0019601695115970745, "loss": 0.2249, "step": 10032 }, { "epoch": 0.017791111359342682, "grad_norm": 0.43359375, "learning_rate": 0.0019601519993344427, "loss": 0.2312, "step": 10034 }, { "epoch": 0.017794657524652497, "grad_norm": 0.318359375, "learning_rate": 0.001960134483310006, "loss": 0.3007, "step": 10036 }, { "epoch": 0.01779820368996231, "grad_norm": 0.392578125, "learning_rate": 0.00196011696352384, "loss": 0.2588, "step": 10038 }, { "epoch": 0.01780174985527213, "grad_norm": 0.36328125, "learning_rate": 0.0019600994399760225, "loss": 0.2709, "step": 10040 }, { "epoch": 0.017805296020581944, "grad_norm": 0.474609375, "learning_rate": 0.0019600819126666296, "loss": 0.2179, "step": 10042 }, { "epoch": 0.01780884218589176, "grad_norm": 0.54296875, "learning_rate": 0.0019600643815957377, "loss": 0.1617, "step": 10044 }, { "epoch": 0.017812388351201573, "grad_norm": 0.341796875, "learning_rate": 0.0019600468467634237, "loss": 0.2976, "step": 10046 }, { "epoch": 0.017815934516511388, "grad_norm": 0.50390625, "learning_rate": 0.0019600293081697647, "loss": 0.2313, "step": 10048 }, { "epoch": 0.017819480681821206, "grad_norm": 0.27734375, "learning_rate": 0.001960011765814837, "loss": 0.2177, "step": 10050 }, { "epoch": 0.01782302684713102, "grad_norm": 0.345703125, "learning_rate": 0.0019599942196987176, "loss": 0.2227, "step": 10052 }, { "epoch": 0.017826573012440835, "grad_norm": 2.71875, "learning_rate": 0.001959976669821483, "loss": 0.4067, "step": 10054 }, { "epoch": 0.01783011917775065, "grad_norm": 1.4921875, "learning_rate": 0.0019599591161832096, "loss": 0.2393, "step": 10056 }, { "epoch": 0.017833665343060464, "grad_norm": 0.61328125, "learning_rate": 0.001959941558783975, "loss": 0.2976, "step": 10058 }, { "epoch": 0.01783721150837028, "grad_norm": 0.291015625, "learning_rate": 0.001959923997623855, "loss": 0.2025, "step": 10060 }, { "epoch": 0.017840757673680097, "grad_norm": 0.82421875, "learning_rate": 0.0019599064327029273, "loss": 0.2785, "step": 10062 }, { "epoch": 0.01784430383898991, "grad_norm": 0.8828125, "learning_rate": 0.0019598888640212685, "loss": 0.22, "step": 10064 }, { "epoch": 0.017847850004299726, "grad_norm": 0.29296875, "learning_rate": 0.001959871291578955, "loss": 0.2273, "step": 10066 }, { "epoch": 0.01785139616960954, "grad_norm": 0.31640625, "learning_rate": 0.001959853715376064, "loss": 0.1614, "step": 10068 }, { "epoch": 0.017854942334919355, "grad_norm": 0.49609375, "learning_rate": 0.001959836135412673, "loss": 0.2047, "step": 10070 }, { "epoch": 0.01785848850022917, "grad_norm": 1.2265625, "learning_rate": 0.001959818551688857, "loss": 0.3237, "step": 10072 }, { "epoch": 0.017862034665538987, "grad_norm": 0.375, "learning_rate": 0.0019598009642046946, "loss": 0.284, "step": 10074 }, { "epoch": 0.017865580830848802, "grad_norm": 0.69140625, "learning_rate": 0.001959783372960262, "loss": 0.2585, "step": 10076 }, { "epoch": 0.017869126996158616, "grad_norm": 0.64453125, "learning_rate": 0.001959765777955637, "loss": 0.2346, "step": 10078 }, { "epoch": 0.01787267316146843, "grad_norm": 0.8046875, "learning_rate": 0.0019597481791908955, "loss": 0.2129, "step": 10080 }, { "epoch": 0.017876219326778246, "grad_norm": 0.40234375, "learning_rate": 0.001959730576666115, "loss": 0.1958, "step": 10082 }, { "epoch": 0.017879765492088064, "grad_norm": 0.71484375, "learning_rate": 0.001959712970381372, "loss": 0.1828, "step": 10084 }, { "epoch": 0.017883311657397878, "grad_norm": 0.375, "learning_rate": 0.0019596953603367444, "loss": 0.2684, "step": 10086 }, { "epoch": 0.017886857822707693, "grad_norm": 1.4296875, "learning_rate": 0.001959677746532308, "loss": 0.2528, "step": 10088 }, { "epoch": 0.017890403988017507, "grad_norm": 0.41796875, "learning_rate": 0.0019596601289681406, "loss": 0.2253, "step": 10090 }, { "epoch": 0.017893950153327322, "grad_norm": 0.59765625, "learning_rate": 0.0019596425076443195, "loss": 0.2649, "step": 10092 }, { "epoch": 0.017897496318637136, "grad_norm": 0.37109375, "learning_rate": 0.0019596248825609216, "loss": 0.2368, "step": 10094 }, { "epoch": 0.017901042483946954, "grad_norm": 0.76953125, "learning_rate": 0.0019596072537180235, "loss": 0.22, "step": 10096 }, { "epoch": 0.01790458864925677, "grad_norm": 6.125, "learning_rate": 0.001959589621115702, "loss": 0.444, "step": 10098 }, { "epoch": 0.017908134814566583, "grad_norm": 1.984375, "learning_rate": 0.0019595719847540355, "loss": 0.3407, "step": 10100 }, { "epoch": 0.017911680979876398, "grad_norm": 0.62109375, "learning_rate": 0.0019595543446331008, "loss": 0.2183, "step": 10102 }, { "epoch": 0.017915227145186213, "grad_norm": 0.88671875, "learning_rate": 0.001959536700752974, "loss": 0.2608, "step": 10104 }, { "epoch": 0.017918773310496027, "grad_norm": 0.6875, "learning_rate": 0.0019595190531137333, "loss": 0.3895, "step": 10106 }, { "epoch": 0.017922319475805845, "grad_norm": 3.53125, "learning_rate": 0.001959501401715455, "loss": 0.3692, "step": 10108 }, { "epoch": 0.01792586564111566, "grad_norm": 0.6328125, "learning_rate": 0.0019594837465582173, "loss": 0.2468, "step": 10110 }, { "epoch": 0.017929411806425474, "grad_norm": 0.453125, "learning_rate": 0.001959466087642097, "loss": 0.2157, "step": 10112 }, { "epoch": 0.01793295797173529, "grad_norm": 1.3359375, "learning_rate": 0.001959448424967171, "loss": 0.2063, "step": 10114 }, { "epoch": 0.017936504137045103, "grad_norm": 0.3515625, "learning_rate": 0.001959430758533517, "loss": 0.2479, "step": 10116 }, { "epoch": 0.01794005030235492, "grad_norm": 0.83203125, "learning_rate": 0.001959413088341212, "loss": 0.2144, "step": 10118 }, { "epoch": 0.017943596467664736, "grad_norm": 0.55859375, "learning_rate": 0.0019593954143903333, "loss": 0.2429, "step": 10120 }, { "epoch": 0.01794714263297455, "grad_norm": 0.5078125, "learning_rate": 0.0019593777366809584, "loss": 0.2281, "step": 10122 }, { "epoch": 0.017950688798284365, "grad_norm": 3.78125, "learning_rate": 0.0019593600552131646, "loss": 0.1981, "step": 10124 }, { "epoch": 0.01795423496359418, "grad_norm": 1.890625, "learning_rate": 0.0019593423699870286, "loss": 0.3672, "step": 10126 }, { "epoch": 0.017957781128903994, "grad_norm": 0.3359375, "learning_rate": 0.0019593246810026286, "loss": 0.2159, "step": 10128 }, { "epoch": 0.017961327294213812, "grad_norm": 0.3046875, "learning_rate": 0.0019593069882600416, "loss": 0.2239, "step": 10130 }, { "epoch": 0.017964873459523627, "grad_norm": 0.3828125, "learning_rate": 0.001959289291759345, "loss": 0.2239, "step": 10132 }, { "epoch": 0.01796841962483344, "grad_norm": 2.21875, "learning_rate": 0.0019592715915006157, "loss": 0.2045, "step": 10134 }, { "epoch": 0.017971965790143256, "grad_norm": 0.375, "learning_rate": 0.0019592538874839325, "loss": 0.2109, "step": 10136 }, { "epoch": 0.01797551195545307, "grad_norm": 0.439453125, "learning_rate": 0.001959236179709371, "loss": 0.2654, "step": 10138 }, { "epoch": 0.017979058120762885, "grad_norm": 0.357421875, "learning_rate": 0.00195921846817701, "loss": 0.1866, "step": 10140 }, { "epoch": 0.017982604286072703, "grad_norm": 0.4765625, "learning_rate": 0.0019592007528869263, "loss": 0.2253, "step": 10142 }, { "epoch": 0.017986150451382518, "grad_norm": 3.5625, "learning_rate": 0.0019591830338391977, "loss": 0.3771, "step": 10144 }, { "epoch": 0.017989696616692332, "grad_norm": 0.431640625, "learning_rate": 0.001959165311033902, "loss": 0.2116, "step": 10146 }, { "epoch": 0.017993242782002147, "grad_norm": 0.341796875, "learning_rate": 0.0019591475844711157, "loss": 0.244, "step": 10148 }, { "epoch": 0.01799678894731196, "grad_norm": 0.208984375, "learning_rate": 0.001959129854150917, "loss": 0.2034, "step": 10150 }, { "epoch": 0.01800033511262178, "grad_norm": 0.447265625, "learning_rate": 0.001959112120073384, "loss": 0.2257, "step": 10152 }, { "epoch": 0.018003881277931594, "grad_norm": 0.482421875, "learning_rate": 0.0019590943822385925, "loss": 0.1996, "step": 10154 }, { "epoch": 0.01800742744324141, "grad_norm": 0.484375, "learning_rate": 0.001959076640646622, "loss": 0.2547, "step": 10156 }, { "epoch": 0.018010973608551223, "grad_norm": 0.34765625, "learning_rate": 0.001959058895297549, "loss": 0.1912, "step": 10158 }, { "epoch": 0.018014519773861037, "grad_norm": 0.431640625, "learning_rate": 0.0019590411461914516, "loss": 0.2222, "step": 10160 }, { "epoch": 0.018018065939170852, "grad_norm": 0.373046875, "learning_rate": 0.001959023393328407, "loss": 0.1483, "step": 10162 }, { "epoch": 0.01802161210448067, "grad_norm": 0.88671875, "learning_rate": 0.001959005636708493, "loss": 0.1871, "step": 10164 }, { "epoch": 0.018025158269790485, "grad_norm": 0.94140625, "learning_rate": 0.0019589878763317872, "loss": 0.3038, "step": 10166 }, { "epoch": 0.0180287044351003, "grad_norm": 0.64453125, "learning_rate": 0.001958970112198368, "loss": 0.2151, "step": 10168 }, { "epoch": 0.018032250600410114, "grad_norm": 0.75, "learning_rate": 0.0019589523443083122, "loss": 0.2968, "step": 10170 }, { "epoch": 0.018035796765719928, "grad_norm": 0.44140625, "learning_rate": 0.0019589345726616974, "loss": 0.2659, "step": 10172 }, { "epoch": 0.018039342931029743, "grad_norm": 1.6953125, "learning_rate": 0.0019589167972586022, "loss": 0.2288, "step": 10174 }, { "epoch": 0.01804288909633956, "grad_norm": 3.46875, "learning_rate": 0.001958899018099104, "loss": 0.2663, "step": 10176 }, { "epoch": 0.018046435261649375, "grad_norm": 0.8359375, "learning_rate": 0.0019588812351832795, "loss": 0.1911, "step": 10178 }, { "epoch": 0.01804998142695919, "grad_norm": 1.1015625, "learning_rate": 0.001958863448511208, "loss": 0.2843, "step": 10180 }, { "epoch": 0.018053527592269004, "grad_norm": 0.9140625, "learning_rate": 0.001958845658082967, "loss": 0.2965, "step": 10182 }, { "epoch": 0.01805707375757882, "grad_norm": 0.87109375, "learning_rate": 0.0019588278638986334, "loss": 0.1668, "step": 10184 }, { "epoch": 0.018060619922888637, "grad_norm": 1.03125, "learning_rate": 0.0019588100659582858, "loss": 0.4837, "step": 10186 }, { "epoch": 0.01806416608819845, "grad_norm": 0.451171875, "learning_rate": 0.0019587922642620016, "loss": 0.1943, "step": 10188 }, { "epoch": 0.018067712253508266, "grad_norm": 0.306640625, "learning_rate": 0.0019587744588098594, "loss": 0.1822, "step": 10190 }, { "epoch": 0.01807125841881808, "grad_norm": 0.3828125, "learning_rate": 0.001958756649601936, "loss": 0.169, "step": 10192 }, { "epoch": 0.018074804584127895, "grad_norm": 0.466796875, "learning_rate": 0.00195873883663831, "loss": 0.201, "step": 10194 }, { "epoch": 0.01807835074943771, "grad_norm": 1.3828125, "learning_rate": 0.0019587210199190595, "loss": 0.2239, "step": 10196 }, { "epoch": 0.018081896914747528, "grad_norm": 0.2275390625, "learning_rate": 0.0019587031994442615, "loss": 0.2852, "step": 10198 }, { "epoch": 0.018085443080057342, "grad_norm": 0.5625, "learning_rate": 0.0019586853752139952, "loss": 0.2208, "step": 10200 }, { "epoch": 0.018088989245367157, "grad_norm": 1.7265625, "learning_rate": 0.001958667547228337, "loss": 0.2622, "step": 10202 }, { "epoch": 0.01809253541067697, "grad_norm": 0.96875, "learning_rate": 0.001958649715487366, "loss": 0.176, "step": 10204 }, { "epoch": 0.018096081575986786, "grad_norm": 0.5859375, "learning_rate": 0.0019586318799911604, "loss": 0.2102, "step": 10206 }, { "epoch": 0.0180996277412966, "grad_norm": 0.80859375, "learning_rate": 0.0019586140407397974, "loss": 0.4113, "step": 10208 }, { "epoch": 0.01810317390660642, "grad_norm": 0.42578125, "learning_rate": 0.0019585961977333552, "loss": 0.245, "step": 10210 }, { "epoch": 0.018106720071916233, "grad_norm": 3.78125, "learning_rate": 0.001958578350971912, "loss": 0.3333, "step": 10212 }, { "epoch": 0.018110266237226048, "grad_norm": 0.6796875, "learning_rate": 0.001958560500455546, "loss": 0.2492, "step": 10214 }, { "epoch": 0.018113812402535862, "grad_norm": 0.453125, "learning_rate": 0.001958542646184335, "loss": 0.2149, "step": 10216 }, { "epoch": 0.018117358567845677, "grad_norm": 0.73828125, "learning_rate": 0.001958524788158357, "loss": 0.2814, "step": 10218 }, { "epoch": 0.018120904733155495, "grad_norm": 0.65234375, "learning_rate": 0.0019585069263776903, "loss": 0.277, "step": 10220 }, { "epoch": 0.01812445089846531, "grad_norm": 0.51171875, "learning_rate": 0.001958489060842413, "loss": 0.3089, "step": 10222 }, { "epoch": 0.018127997063775124, "grad_norm": 1.40625, "learning_rate": 0.0019584711915526035, "loss": 0.2348, "step": 10224 }, { "epoch": 0.01813154322908494, "grad_norm": 0.73828125, "learning_rate": 0.0019584533185083393, "loss": 0.2089, "step": 10226 }, { "epoch": 0.018135089394394753, "grad_norm": 0.63671875, "learning_rate": 0.0019584354417096993, "loss": 0.2093, "step": 10228 }, { "epoch": 0.018138635559704568, "grad_norm": 0.76953125, "learning_rate": 0.001958417561156761, "loss": 0.3352, "step": 10230 }, { "epoch": 0.018142181725014386, "grad_norm": 0.71484375, "learning_rate": 0.0019583996768496033, "loss": 0.2888, "step": 10232 }, { "epoch": 0.0181457278903242, "grad_norm": 1.0, "learning_rate": 0.0019583817887883037, "loss": 0.328, "step": 10234 }, { "epoch": 0.018149274055634015, "grad_norm": 0.255859375, "learning_rate": 0.0019583638969729407, "loss": 0.1834, "step": 10236 }, { "epoch": 0.01815282022094383, "grad_norm": 1.7265625, "learning_rate": 0.0019583460014035927, "loss": 0.2001, "step": 10238 }, { "epoch": 0.018156366386253644, "grad_norm": 0.326171875, "learning_rate": 0.0019583281020803377, "loss": 0.2567, "step": 10240 }, { "epoch": 0.01815991255156346, "grad_norm": 0.73828125, "learning_rate": 0.0019583101990032544, "loss": 0.2188, "step": 10242 }, { "epoch": 0.018163458716873276, "grad_norm": 1.8671875, "learning_rate": 0.0019582922921724207, "loss": 0.3756, "step": 10244 }, { "epoch": 0.01816700488218309, "grad_norm": 1.2265625, "learning_rate": 0.001958274381587915, "loss": 0.2334, "step": 10246 }, { "epoch": 0.018170551047492906, "grad_norm": 2.109375, "learning_rate": 0.0019582564672498163, "loss": 0.4077, "step": 10248 }, { "epoch": 0.01817409721280272, "grad_norm": 0.44140625, "learning_rate": 0.001958238549158202, "loss": 0.1877, "step": 10250 }, { "epoch": 0.018177643378112535, "grad_norm": 0.59375, "learning_rate": 0.0019582206273131507, "loss": 0.2476, "step": 10252 }, { "epoch": 0.018181189543422353, "grad_norm": 0.51171875, "learning_rate": 0.0019582027017147406, "loss": 0.176, "step": 10254 }, { "epoch": 0.018184735708732167, "grad_norm": 0.4375, "learning_rate": 0.001958184772363051, "loss": 0.36, "step": 10256 }, { "epoch": 0.018188281874041982, "grad_norm": 0.6171875, "learning_rate": 0.0019581668392581594, "loss": 0.2001, "step": 10258 }, { "epoch": 0.018191828039351796, "grad_norm": 0.353515625, "learning_rate": 0.0019581489024001445, "loss": 0.215, "step": 10260 }, { "epoch": 0.01819537420466161, "grad_norm": 1.6875, "learning_rate": 0.0019581309617890848, "loss": 0.2457, "step": 10262 }, { "epoch": 0.018198920369971425, "grad_norm": 0.3359375, "learning_rate": 0.001958113017425059, "loss": 0.2749, "step": 10264 }, { "epoch": 0.018202466535281243, "grad_norm": 1.0234375, "learning_rate": 0.001958095069308145, "loss": 0.235, "step": 10266 }, { "epoch": 0.018206012700591058, "grad_norm": 1.4921875, "learning_rate": 0.0019580771174384217, "loss": 0.2405, "step": 10268 }, { "epoch": 0.018209558865900873, "grad_norm": 0.216796875, "learning_rate": 0.001958059161815967, "loss": 0.2308, "step": 10270 }, { "epoch": 0.018213105031210687, "grad_norm": 0.474609375, "learning_rate": 0.0019580412024408607, "loss": 0.4021, "step": 10272 }, { "epoch": 0.0182166511965205, "grad_norm": 0.51171875, "learning_rate": 0.0019580232393131805, "loss": 0.1867, "step": 10274 }, { "epoch": 0.018220197361830316, "grad_norm": 0.28515625, "learning_rate": 0.001958005272433005, "loss": 0.2155, "step": 10276 }, { "epoch": 0.018223743527140134, "grad_norm": 0.21875, "learning_rate": 0.001957987301800413, "loss": 0.2168, "step": 10278 }, { "epoch": 0.01822728969244995, "grad_norm": 0.478515625, "learning_rate": 0.0019579693274154823, "loss": 0.1634, "step": 10280 }, { "epoch": 0.018230835857759763, "grad_norm": 0.54296875, "learning_rate": 0.0019579513492782924, "loss": 0.2428, "step": 10282 }, { "epoch": 0.018234382023069578, "grad_norm": 0.7578125, "learning_rate": 0.0019579333673889216, "loss": 0.2462, "step": 10284 }, { "epoch": 0.018237928188379392, "grad_norm": 0.51953125, "learning_rate": 0.0019579153817474485, "loss": 0.2854, "step": 10286 }, { "epoch": 0.01824147435368921, "grad_norm": 1.4609375, "learning_rate": 0.0019578973923539524, "loss": 0.368, "step": 10288 }, { "epoch": 0.018245020518999025, "grad_norm": 0.2734375, "learning_rate": 0.001957879399208511, "loss": 0.2404, "step": 10290 }, { "epoch": 0.01824856668430884, "grad_norm": 0.3046875, "learning_rate": 0.001957861402311204, "loss": 0.3172, "step": 10292 }, { "epoch": 0.018252112849618654, "grad_norm": 0.453125, "learning_rate": 0.0019578434016621084, "loss": 0.1834, "step": 10294 }, { "epoch": 0.01825565901492847, "grad_norm": 0.70703125, "learning_rate": 0.001957825397261305, "loss": 0.2276, "step": 10296 }, { "epoch": 0.018259205180238283, "grad_norm": 0.70703125, "learning_rate": 0.001957807389108871, "loss": 0.2495, "step": 10298 }, { "epoch": 0.0182627513455481, "grad_norm": 0.31640625, "learning_rate": 0.0019577893772048864, "loss": 0.244, "step": 10300 }, { "epoch": 0.018266297510857916, "grad_norm": 0.46484375, "learning_rate": 0.001957771361549429, "loss": 0.2325, "step": 10302 }, { "epoch": 0.01826984367616773, "grad_norm": 0.5625, "learning_rate": 0.0019577533421425777, "loss": 0.2792, "step": 10304 }, { "epoch": 0.018273389841477545, "grad_norm": 0.6328125, "learning_rate": 0.0019577353189844117, "loss": 0.2202, "step": 10306 }, { "epoch": 0.01827693600678736, "grad_norm": 0.546875, "learning_rate": 0.00195771729207501, "loss": 0.1957, "step": 10308 }, { "epoch": 0.018280482172097174, "grad_norm": 1.046875, "learning_rate": 0.0019576992614144507, "loss": 0.1921, "step": 10310 }, { "epoch": 0.018284028337406992, "grad_norm": 0.314453125, "learning_rate": 0.001957681227002813, "loss": 0.2769, "step": 10312 }, { "epoch": 0.018287574502716807, "grad_norm": 1.0234375, "learning_rate": 0.001957663188840176, "loss": 0.2295, "step": 10314 }, { "epoch": 0.01829112066802662, "grad_norm": 0.3984375, "learning_rate": 0.0019576451469266185, "loss": 0.2137, "step": 10316 }, { "epoch": 0.018294666833336436, "grad_norm": 0.55078125, "learning_rate": 0.001957627101262219, "loss": 0.4195, "step": 10318 }, { "epoch": 0.01829821299864625, "grad_norm": 0.57421875, "learning_rate": 0.001957609051847057, "loss": 0.3452, "step": 10320 }, { "epoch": 0.018301759163956068, "grad_norm": 0.3984375, "learning_rate": 0.001957590998681211, "loss": 0.3118, "step": 10322 }, { "epoch": 0.018305305329265883, "grad_norm": 0.9765625, "learning_rate": 0.0019575729417647602, "loss": 0.2397, "step": 10324 }, { "epoch": 0.018308851494575697, "grad_norm": 0.6015625, "learning_rate": 0.001957554881097783, "loss": 0.2115, "step": 10326 }, { "epoch": 0.018312397659885512, "grad_norm": 0.78125, "learning_rate": 0.0019575368166803594, "loss": 0.2911, "step": 10328 }, { "epoch": 0.018315943825195326, "grad_norm": 1.2734375, "learning_rate": 0.001957518748512568, "loss": 0.251, "step": 10330 }, { "epoch": 0.01831948999050514, "grad_norm": 0.5390625, "learning_rate": 0.0019575006765944875, "loss": 0.2515, "step": 10332 }, { "epoch": 0.01832303615581496, "grad_norm": 0.8828125, "learning_rate": 0.001957482600926197, "loss": 0.2356, "step": 10334 }, { "epoch": 0.018326582321124774, "grad_norm": 1.5859375, "learning_rate": 0.0019574645215077757, "loss": 0.2841, "step": 10336 }, { "epoch": 0.018330128486434588, "grad_norm": 0.373046875, "learning_rate": 0.001957446438339303, "loss": 0.2069, "step": 10338 }, { "epoch": 0.018333674651744403, "grad_norm": 0.349609375, "learning_rate": 0.0019574283514208575, "loss": 0.2113, "step": 10340 }, { "epoch": 0.018337220817054217, "grad_norm": 5.59375, "learning_rate": 0.001957410260752518, "loss": 0.2449, "step": 10342 }, { "epoch": 0.018340766982364032, "grad_norm": 0.515625, "learning_rate": 0.0019573921663343648, "loss": 0.1909, "step": 10344 }, { "epoch": 0.01834431314767385, "grad_norm": 0.63671875, "learning_rate": 0.0019573740681664755, "loss": 0.2399, "step": 10346 }, { "epoch": 0.018347859312983664, "grad_norm": 0.5703125, "learning_rate": 0.0019573559662489303, "loss": 0.2411, "step": 10348 }, { "epoch": 0.01835140547829348, "grad_norm": 2.9375, "learning_rate": 0.0019573378605818085, "loss": 0.4718, "step": 10350 }, { "epoch": 0.018354951643603293, "grad_norm": 0.26953125, "learning_rate": 0.001957319751165189, "loss": 0.2428, "step": 10352 }, { "epoch": 0.018358497808913108, "grad_norm": 2.59375, "learning_rate": 0.0019573016379991503, "loss": 0.215, "step": 10354 }, { "epoch": 0.018362043974222926, "grad_norm": 0.6875, "learning_rate": 0.0019572835210837727, "loss": 0.2647, "step": 10356 }, { "epoch": 0.01836559013953274, "grad_norm": 0.6015625, "learning_rate": 0.0019572654004191346, "loss": 0.2356, "step": 10358 }, { "epoch": 0.018369136304842555, "grad_norm": 0.5859375, "learning_rate": 0.001957247276005316, "loss": 0.1624, "step": 10360 }, { "epoch": 0.01837268247015237, "grad_norm": 0.72265625, "learning_rate": 0.0019572291478423954, "loss": 0.1786, "step": 10362 }, { "epoch": 0.018376228635462184, "grad_norm": 0.671875, "learning_rate": 0.001957211015930452, "loss": 0.2001, "step": 10364 }, { "epoch": 0.018379774800772, "grad_norm": 0.90625, "learning_rate": 0.001957192880269567, "loss": 0.3353, "step": 10366 }, { "epoch": 0.018383320966081817, "grad_norm": 0.5546875, "learning_rate": 0.001957174740859817, "loss": 0.1889, "step": 10368 }, { "epoch": 0.01838686713139163, "grad_norm": 1.1171875, "learning_rate": 0.001957156597701283, "loss": 0.3182, "step": 10370 }, { "epoch": 0.018390413296701446, "grad_norm": 0.53515625, "learning_rate": 0.0019571384507940438, "loss": 0.2011, "step": 10372 }, { "epoch": 0.01839395946201126, "grad_norm": 0.259765625, "learning_rate": 0.0019571203001381788, "loss": 0.215, "step": 10374 }, { "epoch": 0.018397505627321075, "grad_norm": 0.3984375, "learning_rate": 0.001957102145733768, "loss": 0.1834, "step": 10376 }, { "epoch": 0.01840105179263089, "grad_norm": 0.69140625, "learning_rate": 0.0019570839875808895, "loss": 0.5054, "step": 10378 }, { "epoch": 0.018404597957940708, "grad_norm": 0.32421875, "learning_rate": 0.001957065825679624, "loss": 0.1915, "step": 10380 }, { "epoch": 0.018408144123250522, "grad_norm": 0.82421875, "learning_rate": 0.0019570476600300505, "loss": 0.2373, "step": 10382 }, { "epoch": 0.018411690288560337, "grad_norm": 0.6171875, "learning_rate": 0.0019570294906322483, "loss": 0.2582, "step": 10384 }, { "epoch": 0.01841523645387015, "grad_norm": 0.388671875, "learning_rate": 0.0019570113174862966, "loss": 0.2506, "step": 10386 }, { "epoch": 0.018418782619179966, "grad_norm": 1.1640625, "learning_rate": 0.0019569931405922754, "loss": 0.2311, "step": 10388 }, { "epoch": 0.018422328784489784, "grad_norm": 0.7109375, "learning_rate": 0.0019569749599502645, "loss": 0.2086, "step": 10390 }, { "epoch": 0.0184258749497996, "grad_norm": 0.60546875, "learning_rate": 0.0019569567755603422, "loss": 0.2449, "step": 10392 }, { "epoch": 0.018429421115109413, "grad_norm": 0.3515625, "learning_rate": 0.001956938587422589, "loss": 0.1607, "step": 10394 }, { "epoch": 0.018432967280419228, "grad_norm": 0.65625, "learning_rate": 0.0019569203955370844, "loss": 0.2871, "step": 10396 }, { "epoch": 0.018436513445729042, "grad_norm": 1.3515625, "learning_rate": 0.001956902199903907, "loss": 0.2812, "step": 10398 }, { "epoch": 0.018440059611038857, "grad_norm": 0.484375, "learning_rate": 0.0019568840005231383, "loss": 0.223, "step": 10400 }, { "epoch": 0.018443605776348675, "grad_norm": 0.6796875, "learning_rate": 0.001956865797394856, "loss": 0.2565, "step": 10402 }, { "epoch": 0.01844715194165849, "grad_norm": 0.45703125, "learning_rate": 0.0019568475905191404, "loss": 0.2083, "step": 10404 }, { "epoch": 0.018450698106968304, "grad_norm": 0.59375, "learning_rate": 0.0019568293798960714, "loss": 0.2588, "step": 10406 }, { "epoch": 0.01845424427227812, "grad_norm": 0.75390625, "learning_rate": 0.0019568111655257282, "loss": 0.2578, "step": 10408 }, { "epoch": 0.018457790437587933, "grad_norm": 0.7890625, "learning_rate": 0.001956792947408191, "loss": 0.2195, "step": 10410 }, { "epoch": 0.018461336602897747, "grad_norm": 0.21484375, "learning_rate": 0.0019567747255435385, "loss": 0.2523, "step": 10412 }, { "epoch": 0.018464882768207565, "grad_norm": 0.546875, "learning_rate": 0.0019567564999318516, "loss": 0.2143, "step": 10414 }, { "epoch": 0.01846842893351738, "grad_norm": 0.353515625, "learning_rate": 0.001956738270573209, "loss": 0.248, "step": 10416 }, { "epoch": 0.018471975098827195, "grad_norm": 1.0703125, "learning_rate": 0.001956720037467691, "loss": 0.2287, "step": 10418 }, { "epoch": 0.01847552126413701, "grad_norm": 0.94921875, "learning_rate": 0.0019567018006153777, "loss": 0.2996, "step": 10420 }, { "epoch": 0.018479067429446824, "grad_norm": 0.8125, "learning_rate": 0.001956683560016348, "loss": 0.1991, "step": 10422 }, { "epoch": 0.01848261359475664, "grad_norm": 0.271484375, "learning_rate": 0.001956665315670682, "loss": 0.1997, "step": 10424 }, { "epoch": 0.018486159760066456, "grad_norm": 0.435546875, "learning_rate": 0.0019566470675784595, "loss": 0.2157, "step": 10426 }, { "epoch": 0.01848970592537627, "grad_norm": 0.765625, "learning_rate": 0.00195662881573976, "loss": 0.2223, "step": 10428 }, { "epoch": 0.018493252090686085, "grad_norm": 1.109375, "learning_rate": 0.001956610560154664, "loss": 0.1762, "step": 10430 }, { "epoch": 0.0184967982559959, "grad_norm": 0.6328125, "learning_rate": 0.001956592300823251, "loss": 0.2002, "step": 10432 }, { "epoch": 0.018500344421305714, "grad_norm": 0.62109375, "learning_rate": 0.001956574037745601, "loss": 0.2455, "step": 10434 }, { "epoch": 0.018503890586615532, "grad_norm": 0.494140625, "learning_rate": 0.0019565557709217934, "loss": 0.2304, "step": 10436 }, { "epoch": 0.018507436751925347, "grad_norm": 1.5625, "learning_rate": 0.0019565375003519087, "loss": 0.5566, "step": 10438 }, { "epoch": 0.01851098291723516, "grad_norm": 1.1640625, "learning_rate": 0.001956519226036026, "loss": 0.3678, "step": 10440 }, { "epoch": 0.018514529082544976, "grad_norm": 0.55859375, "learning_rate": 0.0019565009479742264, "loss": 0.2132, "step": 10442 }, { "epoch": 0.01851807524785479, "grad_norm": 0.546875, "learning_rate": 0.0019564826661665887, "loss": 0.2474, "step": 10444 }, { "epoch": 0.018521621413164605, "grad_norm": 0.435546875, "learning_rate": 0.0019564643806131935, "loss": 0.2052, "step": 10446 }, { "epoch": 0.018525167578474423, "grad_norm": 1.3046875, "learning_rate": 0.0019564460913141205, "loss": 0.2361, "step": 10448 }, { "epoch": 0.018528713743784238, "grad_norm": 0.3828125, "learning_rate": 0.0019564277982694494, "loss": 0.2359, "step": 10450 }, { "epoch": 0.018532259909094052, "grad_norm": 0.65625, "learning_rate": 0.0019564095014792614, "loss": 0.2016, "step": 10452 }, { "epoch": 0.018535806074403867, "grad_norm": 0.65234375, "learning_rate": 0.0019563912009436355, "loss": 0.3994, "step": 10454 }, { "epoch": 0.01853935223971368, "grad_norm": 0.6015625, "learning_rate": 0.0019563728966626517, "loss": 0.2731, "step": 10456 }, { "epoch": 0.0185428984050235, "grad_norm": 0.2490234375, "learning_rate": 0.00195635458863639, "loss": 0.2361, "step": 10458 }, { "epoch": 0.018546444570333314, "grad_norm": 0.52734375, "learning_rate": 0.0019563362768649315, "loss": 0.2998, "step": 10460 }, { "epoch": 0.01854999073564313, "grad_norm": 0.6015625, "learning_rate": 0.001956317961348355, "loss": 0.2047, "step": 10462 }, { "epoch": 0.018553536900952943, "grad_norm": 2.0625, "learning_rate": 0.0019562996420867413, "loss": 0.3433, "step": 10464 }, { "epoch": 0.018557083066262758, "grad_norm": 0.61328125, "learning_rate": 0.0019562813190801705, "loss": 0.1959, "step": 10466 }, { "epoch": 0.018560629231572572, "grad_norm": 0.478515625, "learning_rate": 0.001956262992328722, "loss": 0.2983, "step": 10468 }, { "epoch": 0.01856417539688239, "grad_norm": 0.2890625, "learning_rate": 0.001956244661832477, "loss": 0.1771, "step": 10470 }, { "epoch": 0.018567721562192205, "grad_norm": 0.365234375, "learning_rate": 0.0019562263275915157, "loss": 0.2537, "step": 10472 }, { "epoch": 0.01857126772750202, "grad_norm": 0.53515625, "learning_rate": 0.001956207989605917, "loss": 0.1784, "step": 10474 }, { "epoch": 0.018574813892811834, "grad_norm": 0.5859375, "learning_rate": 0.0019561896478757623, "loss": 0.1929, "step": 10476 }, { "epoch": 0.01857836005812165, "grad_norm": 1.8515625, "learning_rate": 0.0019561713024011315, "loss": 0.3238, "step": 10478 }, { "epoch": 0.018581906223431463, "grad_norm": 0.3203125, "learning_rate": 0.0019561529531821045, "loss": 0.2495, "step": 10480 }, { "epoch": 0.01858545238874128, "grad_norm": 0.41015625, "learning_rate": 0.001956134600218762, "loss": 0.202, "step": 10482 }, { "epoch": 0.018588998554051096, "grad_norm": 0.7578125, "learning_rate": 0.001956116243511184, "loss": 0.208, "step": 10484 }, { "epoch": 0.01859254471936091, "grad_norm": 0.80078125, "learning_rate": 0.0019560978830594506, "loss": 0.2025, "step": 10486 }, { "epoch": 0.018596090884670725, "grad_norm": 1.53125, "learning_rate": 0.0019560795188636425, "loss": 0.2007, "step": 10488 }, { "epoch": 0.01859963704998054, "grad_norm": 0.44921875, "learning_rate": 0.0019560611509238397, "loss": 0.2096, "step": 10490 }, { "epoch": 0.018603183215290357, "grad_norm": 1.5546875, "learning_rate": 0.001956042779240123, "loss": 0.2777, "step": 10492 }, { "epoch": 0.018606729380600172, "grad_norm": 0.58984375, "learning_rate": 0.001956024403812572, "loss": 0.3095, "step": 10494 }, { "epoch": 0.018610275545909986, "grad_norm": 1.296875, "learning_rate": 0.001956006024641268, "loss": 0.41, "step": 10496 }, { "epoch": 0.0186138217112198, "grad_norm": 0.5859375, "learning_rate": 0.0019559876417262908, "loss": 0.1939, "step": 10498 }, { "epoch": 0.018617367876529616, "grad_norm": 2.125, "learning_rate": 0.0019559692550677205, "loss": 0.2805, "step": 10500 }, { "epoch": 0.01862091404183943, "grad_norm": 0.5859375, "learning_rate": 0.0019559508646656383, "loss": 0.2347, "step": 10502 }, { "epoch": 0.018624460207149248, "grad_norm": 0.83203125, "learning_rate": 0.0019559324705201242, "loss": 0.2676, "step": 10504 }, { "epoch": 0.018628006372459063, "grad_norm": 0.69140625, "learning_rate": 0.001955914072631258, "loss": 0.2571, "step": 10506 }, { "epoch": 0.018631552537768877, "grad_norm": 0.248046875, "learning_rate": 0.0019558956709991217, "loss": 0.1935, "step": 10508 }, { "epoch": 0.01863509870307869, "grad_norm": 0.478515625, "learning_rate": 0.0019558772656237946, "loss": 0.2045, "step": 10510 }, { "epoch": 0.018638644868388506, "grad_norm": 0.3203125, "learning_rate": 0.001955858856505357, "loss": 0.1858, "step": 10512 }, { "epoch": 0.01864219103369832, "grad_norm": 0.44140625, "learning_rate": 0.0019558404436438906, "loss": 0.1759, "step": 10514 }, { "epoch": 0.01864573719900814, "grad_norm": 0.322265625, "learning_rate": 0.0019558220270394747, "loss": 0.2053, "step": 10516 }, { "epoch": 0.018649283364317953, "grad_norm": 0.3125, "learning_rate": 0.0019558036066921907, "loss": 0.2209, "step": 10518 }, { "epoch": 0.018652829529627768, "grad_norm": 0.90234375, "learning_rate": 0.0019557851826021186, "loss": 0.2606, "step": 10520 }, { "epoch": 0.018656375694937583, "grad_norm": 0.40625, "learning_rate": 0.0019557667547693393, "loss": 0.2002, "step": 10522 }, { "epoch": 0.018659921860247397, "grad_norm": 0.3359375, "learning_rate": 0.0019557483231939336, "loss": 0.1602, "step": 10524 }, { "epoch": 0.018663468025557215, "grad_norm": 0.375, "learning_rate": 0.0019557298878759813, "loss": 0.2976, "step": 10526 }, { "epoch": 0.01866701419086703, "grad_norm": 0.68359375, "learning_rate": 0.001955711448815564, "loss": 0.2863, "step": 10528 }, { "epoch": 0.018670560356176844, "grad_norm": 8.375, "learning_rate": 0.001955693006012762, "loss": 0.3923, "step": 10530 }, { "epoch": 0.01867410652148666, "grad_norm": 0.515625, "learning_rate": 0.001955674559467655, "loss": 0.1933, "step": 10532 }, { "epoch": 0.018677652686796473, "grad_norm": 0.77734375, "learning_rate": 0.0019556561091803254, "loss": 0.1943, "step": 10534 }, { "epoch": 0.018681198852106288, "grad_norm": 0.392578125, "learning_rate": 0.0019556376551508525, "loss": 0.1805, "step": 10536 }, { "epoch": 0.018684745017416106, "grad_norm": 0.83984375, "learning_rate": 0.0019556191973793178, "loss": 0.2526, "step": 10538 }, { "epoch": 0.01868829118272592, "grad_norm": 1.328125, "learning_rate": 0.0019556007358658015, "loss": 0.2645, "step": 10540 }, { "epoch": 0.018691837348035735, "grad_norm": 0.3828125, "learning_rate": 0.0019555822706103843, "loss": 0.2595, "step": 10542 }, { "epoch": 0.01869538351334555, "grad_norm": 0.4375, "learning_rate": 0.001955563801613148, "loss": 0.2784, "step": 10544 }, { "epoch": 0.018698929678655364, "grad_norm": 4.9375, "learning_rate": 0.001955545328874172, "loss": 0.388, "step": 10546 }, { "epoch": 0.01870247584396518, "grad_norm": 0.55859375, "learning_rate": 0.001955526852393538, "loss": 0.2246, "step": 10548 }, { "epoch": 0.018706022009274997, "grad_norm": 0.439453125, "learning_rate": 0.001955508372171326, "loss": 0.258, "step": 10550 }, { "epoch": 0.01870956817458481, "grad_norm": 1.265625, "learning_rate": 0.001955489888207618, "loss": 0.4772, "step": 10552 }, { "epoch": 0.018713114339894626, "grad_norm": 1.03125, "learning_rate": 0.001955471400502494, "loss": 0.207, "step": 10554 }, { "epoch": 0.01871666050520444, "grad_norm": 0.84765625, "learning_rate": 0.0019554529090560348, "loss": 0.2586, "step": 10556 }, { "epoch": 0.018720206670514255, "grad_norm": 0.56640625, "learning_rate": 0.0019554344138683214, "loss": 0.2212, "step": 10558 }, { "epoch": 0.018723752835824073, "grad_norm": 1.875, "learning_rate": 0.001955415914939435, "loss": 0.3548, "step": 10560 }, { "epoch": 0.018727299001133887, "grad_norm": 0.435546875, "learning_rate": 0.001955397412269456, "loss": 0.1997, "step": 10562 }, { "epoch": 0.018730845166443702, "grad_norm": 1.125, "learning_rate": 0.0019553789058584657, "loss": 0.2845, "step": 10564 }, { "epoch": 0.018734391331753517, "grad_norm": 0.9453125, "learning_rate": 0.0019553603957065454, "loss": 0.1796, "step": 10566 }, { "epoch": 0.01873793749706333, "grad_norm": 0.2373046875, "learning_rate": 0.001955341881813775, "loss": 0.247, "step": 10568 }, { "epoch": 0.018741483662373146, "grad_norm": 0.349609375, "learning_rate": 0.0019553233641802364, "loss": 0.2784, "step": 10570 }, { "epoch": 0.018745029827682964, "grad_norm": 0.490234375, "learning_rate": 0.00195530484280601, "loss": 0.1938, "step": 10572 }, { "epoch": 0.018748575992992778, "grad_norm": 0.62109375, "learning_rate": 0.001955286317691177, "loss": 0.2325, "step": 10574 }, { "epoch": 0.018752122158302593, "grad_norm": 0.486328125, "learning_rate": 0.0019552677888358184, "loss": 0.277, "step": 10576 }, { "epoch": 0.018755668323612407, "grad_norm": 0.439453125, "learning_rate": 0.001955249256240016, "loss": 0.1885, "step": 10578 }, { "epoch": 0.018759214488922222, "grad_norm": 0.85546875, "learning_rate": 0.0019552307199038493, "loss": 0.2588, "step": 10580 }, { "epoch": 0.018762760654232036, "grad_norm": 0.458984375, "learning_rate": 0.0019552121798274004, "loss": 0.1862, "step": 10582 }, { "epoch": 0.018766306819541854, "grad_norm": 1.671875, "learning_rate": 0.00195519363601075, "loss": 0.2548, "step": 10584 }, { "epoch": 0.01876985298485167, "grad_norm": 0.345703125, "learning_rate": 0.00195517508845398, "loss": 0.2568, "step": 10586 }, { "epoch": 0.018773399150161484, "grad_norm": 0.236328125, "learning_rate": 0.0019551565371571707, "loss": 0.2379, "step": 10588 }, { "epoch": 0.018776945315471298, "grad_norm": 0.455078125, "learning_rate": 0.0019551379821204033, "loss": 0.2563, "step": 10590 }, { "epoch": 0.018780491480781113, "grad_norm": 0.8828125, "learning_rate": 0.001955119423343759, "loss": 0.2551, "step": 10592 }, { "epoch": 0.01878403764609093, "grad_norm": 0.359375, "learning_rate": 0.001955100860827319, "loss": 0.22, "step": 10594 }, { "epoch": 0.018787583811400745, "grad_norm": 0.76953125, "learning_rate": 0.001955082294571165, "loss": 0.2162, "step": 10596 }, { "epoch": 0.01879112997671056, "grad_norm": 0.515625, "learning_rate": 0.0019550637245753775, "loss": 0.3297, "step": 10598 }, { "epoch": 0.018794676142020374, "grad_norm": 1.0390625, "learning_rate": 0.001955045150840038, "loss": 0.2616, "step": 10600 }, { "epoch": 0.01879822230733019, "grad_norm": 0.6796875, "learning_rate": 0.0019550265733652276, "loss": 0.2574, "step": 10602 }, { "epoch": 0.018801768472640003, "grad_norm": 0.83203125, "learning_rate": 0.0019550079921510275, "loss": 0.2556, "step": 10604 }, { "epoch": 0.01880531463794982, "grad_norm": 0.306640625, "learning_rate": 0.001954989407197519, "loss": 0.2032, "step": 10606 }, { "epoch": 0.018808860803259636, "grad_norm": 1.4375, "learning_rate": 0.001954970818504784, "loss": 0.2785, "step": 10608 }, { "epoch": 0.01881240696856945, "grad_norm": 0.92578125, "learning_rate": 0.001954952226072903, "loss": 0.2119, "step": 10610 }, { "epoch": 0.018815953133879265, "grad_norm": 0.376953125, "learning_rate": 0.0019549336299019573, "loss": 0.2636, "step": 10612 }, { "epoch": 0.01881949929918908, "grad_norm": 0.310546875, "learning_rate": 0.0019549150299920286, "loss": 0.3351, "step": 10614 }, { "epoch": 0.018823045464498894, "grad_norm": 0.57421875, "learning_rate": 0.0019548964263431984, "loss": 0.1953, "step": 10616 }, { "epoch": 0.018826591629808712, "grad_norm": 0.41796875, "learning_rate": 0.0019548778189555477, "loss": 0.2252, "step": 10618 }, { "epoch": 0.018830137795118527, "grad_norm": 1.4453125, "learning_rate": 0.001954859207829158, "loss": 0.2535, "step": 10620 }, { "epoch": 0.01883368396042834, "grad_norm": 0.46484375, "learning_rate": 0.0019548405929641108, "loss": 0.2568, "step": 10622 }, { "epoch": 0.018837230125738156, "grad_norm": 0.76171875, "learning_rate": 0.001954821974360487, "loss": 0.2194, "step": 10624 }, { "epoch": 0.01884077629104797, "grad_norm": 3.609375, "learning_rate": 0.0019548033520183686, "loss": 0.3385, "step": 10626 }, { "epoch": 0.01884432245635779, "grad_norm": 0.64453125, "learning_rate": 0.001954784725937837, "loss": 0.1935, "step": 10628 }, { "epoch": 0.018847868621667603, "grad_norm": 0.625, "learning_rate": 0.0019547660961189736, "loss": 0.344, "step": 10630 }, { "epoch": 0.018851414786977418, "grad_norm": 0.31640625, "learning_rate": 0.0019547474625618596, "loss": 0.1781, "step": 10632 }, { "epoch": 0.018854960952287232, "grad_norm": 0.7578125, "learning_rate": 0.0019547288252665766, "loss": 0.2042, "step": 10634 }, { "epoch": 0.018858507117597047, "grad_norm": 0.515625, "learning_rate": 0.0019547101842332065, "loss": 0.2229, "step": 10636 }, { "epoch": 0.01886205328290686, "grad_norm": 0.59375, "learning_rate": 0.0019546915394618304, "loss": 0.2343, "step": 10638 }, { "epoch": 0.01886559944821668, "grad_norm": 1.8046875, "learning_rate": 0.0019546728909525302, "loss": 0.4387, "step": 10640 }, { "epoch": 0.018869145613526494, "grad_norm": 0.365234375, "learning_rate": 0.0019546542387053867, "loss": 0.1795, "step": 10642 }, { "epoch": 0.01887269177883631, "grad_norm": 0.60546875, "learning_rate": 0.0019546355827204827, "loss": 0.2605, "step": 10644 }, { "epoch": 0.018876237944146123, "grad_norm": 1.03125, "learning_rate": 0.0019546169229978983, "loss": 0.3554, "step": 10646 }, { "epoch": 0.018879784109455938, "grad_norm": 0.68359375, "learning_rate": 0.0019545982595377165, "loss": 0.2676, "step": 10648 }, { "epoch": 0.018883330274765752, "grad_norm": 0.72265625, "learning_rate": 0.001954579592340018, "loss": 0.2549, "step": 10650 }, { "epoch": 0.01888687644007557, "grad_norm": 1.9453125, "learning_rate": 0.001954560921404885, "loss": 0.2426, "step": 10652 }, { "epoch": 0.018890422605385385, "grad_norm": 0.96484375, "learning_rate": 0.001954542246732399, "loss": 0.303, "step": 10654 }, { "epoch": 0.0188939687706952, "grad_norm": 0.5078125, "learning_rate": 0.0019545235683226412, "loss": 0.2656, "step": 10656 }, { "epoch": 0.018897514936005014, "grad_norm": 0.3046875, "learning_rate": 0.0019545048861756937, "loss": 0.2267, "step": 10658 }, { "epoch": 0.01890106110131483, "grad_norm": 0.7421875, "learning_rate": 0.0019544862002916384, "loss": 0.3153, "step": 10660 }, { "epoch": 0.018904607266624646, "grad_norm": 0.31640625, "learning_rate": 0.001954467510670557, "loss": 0.1953, "step": 10662 }, { "epoch": 0.01890815343193446, "grad_norm": 0.953125, "learning_rate": 0.0019544488173125307, "loss": 0.291, "step": 10664 }, { "epoch": 0.018911699597244275, "grad_norm": 0.244140625, "learning_rate": 0.001954430120217642, "loss": 0.283, "step": 10666 }, { "epoch": 0.01891524576255409, "grad_norm": 0.65234375, "learning_rate": 0.0019544114193859713, "loss": 0.2134, "step": 10668 }, { "epoch": 0.018918791927863905, "grad_norm": 0.51953125, "learning_rate": 0.0019543927148176024, "loss": 0.2538, "step": 10670 }, { "epoch": 0.01892233809317372, "grad_norm": 0.4609375, "learning_rate": 0.0019543740065126156, "loss": 0.2825, "step": 10672 }, { "epoch": 0.018925884258483537, "grad_norm": 0.51953125, "learning_rate": 0.001954355294471093, "loss": 0.2298, "step": 10674 }, { "epoch": 0.01892943042379335, "grad_norm": 0.8984375, "learning_rate": 0.0019543365786931174, "loss": 0.2322, "step": 10676 }, { "epoch": 0.018932976589103166, "grad_norm": 0.546875, "learning_rate": 0.001954317859178769, "loss": 0.2352, "step": 10678 }, { "epoch": 0.01893652275441298, "grad_norm": 0.8828125, "learning_rate": 0.0019542991359281312, "loss": 0.2686, "step": 10680 }, { "epoch": 0.018940068919722795, "grad_norm": 0.36328125, "learning_rate": 0.0019542804089412846, "loss": 0.2246, "step": 10682 }, { "epoch": 0.01894361508503261, "grad_norm": 0.4609375, "learning_rate": 0.001954261678218312, "loss": 0.2252, "step": 10684 }, { "epoch": 0.018947161250342428, "grad_norm": 0.353515625, "learning_rate": 0.001954242943759295, "loss": 0.2093, "step": 10686 }, { "epoch": 0.018950707415652242, "grad_norm": 0.29296875, "learning_rate": 0.0019542242055643156, "loss": 0.2244, "step": 10688 }, { "epoch": 0.018954253580962057, "grad_norm": 0.75, "learning_rate": 0.001954205463633456, "loss": 0.2106, "step": 10690 }, { "epoch": 0.01895779974627187, "grad_norm": 0.5234375, "learning_rate": 0.0019541867179667972, "loss": 0.24, "step": 10692 }, { "epoch": 0.018961345911581686, "grad_norm": 1.046875, "learning_rate": 0.0019541679685644224, "loss": 0.2106, "step": 10694 }, { "epoch": 0.018964892076891504, "grad_norm": 1.3515625, "learning_rate": 0.001954149215426413, "loss": 0.2169, "step": 10696 }, { "epoch": 0.01896843824220132, "grad_norm": 0.60546875, "learning_rate": 0.0019541304585528507, "loss": 0.2906, "step": 10698 }, { "epoch": 0.018971984407511133, "grad_norm": 1.6953125, "learning_rate": 0.0019541116979438183, "loss": 0.2548, "step": 10700 }, { "epoch": 0.018975530572820948, "grad_norm": 0.48046875, "learning_rate": 0.001954092933599397, "loss": 0.1975, "step": 10702 }, { "epoch": 0.018979076738130762, "grad_norm": 1.265625, "learning_rate": 0.00195407416551967, "loss": 0.2005, "step": 10704 }, { "epoch": 0.018982622903440577, "grad_norm": 0.6015625, "learning_rate": 0.0019540553937047187, "loss": 0.3157, "step": 10706 }, { "epoch": 0.018986169068750395, "grad_norm": 0.482421875, "learning_rate": 0.0019540366181546244, "loss": 0.2572, "step": 10708 }, { "epoch": 0.01898971523406021, "grad_norm": 1.0, "learning_rate": 0.001954017838869471, "loss": 0.2349, "step": 10710 }, { "epoch": 0.018993261399370024, "grad_norm": 1.578125, "learning_rate": 0.001953999055849339, "loss": 0.2347, "step": 10712 }, { "epoch": 0.01899680756467984, "grad_norm": 0.5390625, "learning_rate": 0.001953980269094311, "loss": 0.2762, "step": 10714 }, { "epoch": 0.019000353729989653, "grad_norm": 1.7890625, "learning_rate": 0.00195396147860447, "loss": 0.2047, "step": 10716 }, { "epoch": 0.019003899895299468, "grad_norm": 0.390625, "learning_rate": 0.001953942684379897, "loss": 0.2115, "step": 10718 }, { "epoch": 0.019007446060609286, "grad_norm": 2.1875, "learning_rate": 0.0019539238864206753, "loss": 0.2352, "step": 10720 }, { "epoch": 0.0190109922259191, "grad_norm": 0.45703125, "learning_rate": 0.0019539050847268862, "loss": 0.1856, "step": 10722 }, { "epoch": 0.019014538391228915, "grad_norm": 1.6796875, "learning_rate": 0.001953886279298612, "loss": 0.2969, "step": 10724 }, { "epoch": 0.01901808455653873, "grad_norm": 0.828125, "learning_rate": 0.001953867470135936, "loss": 0.2008, "step": 10726 }, { "epoch": 0.019021630721848544, "grad_norm": 1.5703125, "learning_rate": 0.001953848657238939, "loss": 0.2502, "step": 10728 }, { "epoch": 0.019025176887158362, "grad_norm": 0.47265625, "learning_rate": 0.001953829840607704, "loss": 0.3565, "step": 10730 }, { "epoch": 0.019028723052468176, "grad_norm": 0.52734375, "learning_rate": 0.001953811020242313, "loss": 0.2566, "step": 10732 }, { "epoch": 0.01903226921777799, "grad_norm": 3.265625, "learning_rate": 0.001953792196142849, "loss": 0.3169, "step": 10734 }, { "epoch": 0.019035815383087806, "grad_norm": 0.95703125, "learning_rate": 0.001953773368309394, "loss": 0.2331, "step": 10736 }, { "epoch": 0.01903936154839762, "grad_norm": 0.61328125, "learning_rate": 0.00195375453674203, "loss": 0.1891, "step": 10738 }, { "epoch": 0.019042907713707435, "grad_norm": 3.265625, "learning_rate": 0.0019537357014408393, "loss": 0.2587, "step": 10740 }, { "epoch": 0.019046453879017253, "grad_norm": 0.486328125, "learning_rate": 0.001953716862405905, "loss": 0.2695, "step": 10742 }, { "epoch": 0.019050000044327067, "grad_norm": 0.302734375, "learning_rate": 0.001953698019637309, "loss": 0.3266, "step": 10744 }, { "epoch": 0.019053546209636882, "grad_norm": 1.140625, "learning_rate": 0.0019536791731351337, "loss": 0.2231, "step": 10746 }, { "epoch": 0.019057092374946696, "grad_norm": 0.796875, "learning_rate": 0.0019536603228994614, "loss": 0.296, "step": 10748 }, { "epoch": 0.01906063854025651, "grad_norm": 0.640625, "learning_rate": 0.0019536414689303745, "loss": 0.2146, "step": 10750 }, { "epoch": 0.019064184705566325, "grad_norm": 1.0625, "learning_rate": 0.001953622611227956, "loss": 0.3058, "step": 10752 }, { "epoch": 0.019067730870876144, "grad_norm": 0.30859375, "learning_rate": 0.001953603749792288, "loss": 0.3183, "step": 10754 }, { "epoch": 0.019071277036185958, "grad_norm": 0.310546875, "learning_rate": 0.001953584884623453, "loss": 0.199, "step": 10756 }, { "epoch": 0.019074823201495773, "grad_norm": 0.6640625, "learning_rate": 0.0019535660157215338, "loss": 0.2279, "step": 10758 }, { "epoch": 0.019078369366805587, "grad_norm": 0.359375, "learning_rate": 0.0019535471430866124, "loss": 0.2577, "step": 10760 }, { "epoch": 0.0190819155321154, "grad_norm": 0.3828125, "learning_rate": 0.0019535282667187716, "loss": 0.2329, "step": 10762 }, { "epoch": 0.01908546169742522, "grad_norm": 0.9375, "learning_rate": 0.001953509386618094, "loss": 0.2759, "step": 10764 }, { "epoch": 0.019089007862735034, "grad_norm": 1.2421875, "learning_rate": 0.001953490502784662, "loss": 0.2235, "step": 10766 }, { "epoch": 0.01909255402804485, "grad_norm": 0.490234375, "learning_rate": 0.0019534716152185584, "loss": 0.3044, "step": 10768 }, { "epoch": 0.019096100193354663, "grad_norm": 0.86328125, "learning_rate": 0.0019534527239198655, "loss": 0.3898, "step": 10770 }, { "epoch": 0.019099646358664478, "grad_norm": 0.59375, "learning_rate": 0.0019534338288886666, "loss": 0.2063, "step": 10772 }, { "epoch": 0.019103192523974293, "grad_norm": 1.5859375, "learning_rate": 0.0019534149301250435, "loss": 0.2357, "step": 10774 }, { "epoch": 0.01910673868928411, "grad_norm": 0.45703125, "learning_rate": 0.0019533960276290796, "loss": 0.2266, "step": 10776 }, { "epoch": 0.019110284854593925, "grad_norm": 0.40234375, "learning_rate": 0.0019533771214008564, "loss": 0.1959, "step": 10778 }, { "epoch": 0.01911383101990374, "grad_norm": 0.376953125, "learning_rate": 0.001953358211440458, "loss": 0.2363, "step": 10780 }, { "epoch": 0.019117377185213554, "grad_norm": 0.3203125, "learning_rate": 0.0019533392977479666, "loss": 0.191, "step": 10782 }, { "epoch": 0.01912092335052337, "grad_norm": 0.5546875, "learning_rate": 0.0019533203803234643, "loss": 0.2211, "step": 10784 }, { "epoch": 0.019124469515833183, "grad_norm": 0.6640625, "learning_rate": 0.0019533014591670344, "loss": 0.2336, "step": 10786 }, { "epoch": 0.019128015681143, "grad_norm": 1.125, "learning_rate": 0.0019532825342787603, "loss": 0.2508, "step": 10788 }, { "epoch": 0.019131561846452816, "grad_norm": 0.65625, "learning_rate": 0.001953263605658723, "loss": 0.2136, "step": 10790 }, { "epoch": 0.01913510801176263, "grad_norm": 4.5, "learning_rate": 0.001953244673307007, "loss": 0.301, "step": 10792 }, { "epoch": 0.019138654177072445, "grad_norm": 0.39453125, "learning_rate": 0.0019532257372236943, "loss": 0.2218, "step": 10794 }, { "epoch": 0.01914220034238226, "grad_norm": 0.462890625, "learning_rate": 0.001953206797408868, "loss": 0.2472, "step": 10796 }, { "epoch": 0.019145746507692078, "grad_norm": 0.66796875, "learning_rate": 0.0019531878538626103, "loss": 0.2439, "step": 10798 }, { "epoch": 0.019149292673001892, "grad_norm": 1.828125, "learning_rate": 0.001953168906585005, "loss": 0.2473, "step": 10800 }, { "epoch": 0.019152838838311707, "grad_norm": 0.357421875, "learning_rate": 0.001953149955576134, "loss": 0.1757, "step": 10802 }, { "epoch": 0.01915638500362152, "grad_norm": 0.6015625, "learning_rate": 0.0019531310008360806, "loss": 0.2036, "step": 10804 }, { "epoch": 0.019159931168931336, "grad_norm": 2.375, "learning_rate": 0.0019531120423649283, "loss": 0.2246, "step": 10806 }, { "epoch": 0.01916347733424115, "grad_norm": 0.71484375, "learning_rate": 0.0019530930801627594, "loss": 0.1578, "step": 10808 }, { "epoch": 0.01916702349955097, "grad_norm": 0.44921875, "learning_rate": 0.0019530741142296565, "loss": 0.2797, "step": 10810 }, { "epoch": 0.019170569664860783, "grad_norm": 0.55078125, "learning_rate": 0.0019530551445657031, "loss": 0.2009, "step": 10812 }, { "epoch": 0.019174115830170597, "grad_norm": 0.6640625, "learning_rate": 0.0019530361711709823, "loss": 0.3717, "step": 10814 }, { "epoch": 0.019177661995480412, "grad_norm": 1.90625, "learning_rate": 0.0019530171940455763, "loss": 0.3369, "step": 10816 }, { "epoch": 0.019181208160790227, "grad_norm": 0.671875, "learning_rate": 0.0019529982131895691, "loss": 0.2041, "step": 10818 }, { "epoch": 0.01918475432610004, "grad_norm": 0.318359375, "learning_rate": 0.001952979228603043, "loss": 0.2526, "step": 10820 }, { "epoch": 0.01918830049140986, "grad_norm": 0.453125, "learning_rate": 0.001952960240286081, "loss": 0.2245, "step": 10822 }, { "epoch": 0.019191846656719674, "grad_norm": 1.3359375, "learning_rate": 0.0019529412482387667, "loss": 0.2152, "step": 10824 }, { "epoch": 0.019195392822029488, "grad_norm": 0.64453125, "learning_rate": 0.0019529222524611825, "loss": 0.2138, "step": 10826 }, { "epoch": 0.019198938987339303, "grad_norm": 0.279296875, "learning_rate": 0.0019529032529534123, "loss": 0.2304, "step": 10828 }, { "epoch": 0.019202485152649117, "grad_norm": 0.98828125, "learning_rate": 0.0019528842497155382, "loss": 0.2946, "step": 10830 }, { "epoch": 0.019206031317958935, "grad_norm": 0.4375, "learning_rate": 0.0019528652427476438, "loss": 0.3555, "step": 10832 }, { "epoch": 0.01920957748326875, "grad_norm": 0.314453125, "learning_rate": 0.0019528462320498126, "loss": 0.4899, "step": 10834 }, { "epoch": 0.019213123648578564, "grad_norm": 1.8671875, "learning_rate": 0.0019528272176221272, "loss": 0.3046, "step": 10836 }, { "epoch": 0.01921666981388838, "grad_norm": 2.109375, "learning_rate": 0.001952808199464671, "loss": 0.2421, "step": 10838 }, { "epoch": 0.019220215979198194, "grad_norm": 0.49609375, "learning_rate": 0.0019527891775775268, "loss": 0.4896, "step": 10840 }, { "epoch": 0.019223762144508008, "grad_norm": 0.953125, "learning_rate": 0.0019527701519607783, "loss": 0.3056, "step": 10842 }, { "epoch": 0.019227308309817826, "grad_norm": 0.98046875, "learning_rate": 0.0019527511226145088, "loss": 0.2108, "step": 10844 }, { "epoch": 0.01923085447512764, "grad_norm": 0.578125, "learning_rate": 0.001952732089538801, "loss": 0.3534, "step": 10846 }, { "epoch": 0.019234400640437455, "grad_norm": 0.8046875, "learning_rate": 0.0019527130527337382, "loss": 0.2177, "step": 10848 }, { "epoch": 0.01923794680574727, "grad_norm": 0.51171875, "learning_rate": 0.0019526940121994036, "loss": 0.3468, "step": 10850 }, { "epoch": 0.019241492971057084, "grad_norm": 0.376953125, "learning_rate": 0.0019526749679358813, "loss": 0.3032, "step": 10852 }, { "epoch": 0.0192450391363669, "grad_norm": 0.255859375, "learning_rate": 0.0019526559199432537, "loss": 0.212, "step": 10854 }, { "epoch": 0.019248585301676717, "grad_norm": 0.78125, "learning_rate": 0.0019526368682216044, "loss": 0.2494, "step": 10856 }, { "epoch": 0.01925213146698653, "grad_norm": 0.34375, "learning_rate": 0.0019526178127710165, "loss": 0.1968, "step": 10858 }, { "epoch": 0.019255677632296346, "grad_norm": 0.443359375, "learning_rate": 0.001952598753591574, "loss": 0.2657, "step": 10860 }, { "epoch": 0.01925922379760616, "grad_norm": 0.59375, "learning_rate": 0.0019525796906833598, "loss": 0.2805, "step": 10862 }, { "epoch": 0.019262769962915975, "grad_norm": 0.85546875, "learning_rate": 0.0019525606240464565, "loss": 0.2177, "step": 10864 }, { "epoch": 0.01926631612822579, "grad_norm": 0.26953125, "learning_rate": 0.0019525415536809493, "loss": 0.2162, "step": 10866 }, { "epoch": 0.019269862293535608, "grad_norm": 0.25, "learning_rate": 0.0019525224795869196, "loss": 0.3353, "step": 10868 }, { "epoch": 0.019273408458845422, "grad_norm": 0.400390625, "learning_rate": 0.0019525034017644522, "loss": 0.2781, "step": 10870 }, { "epoch": 0.019276954624155237, "grad_norm": 0.87890625, "learning_rate": 0.0019524843202136303, "loss": 0.2958, "step": 10872 }, { "epoch": 0.01928050078946505, "grad_norm": 0.31640625, "learning_rate": 0.0019524652349345371, "loss": 0.2147, "step": 10874 }, { "epoch": 0.019284046954774866, "grad_norm": 0.412109375, "learning_rate": 0.0019524461459272562, "loss": 0.2359, "step": 10876 }, { "epoch": 0.019287593120084684, "grad_norm": 0.81640625, "learning_rate": 0.0019524270531918707, "loss": 0.2246, "step": 10878 }, { "epoch": 0.0192911392853945, "grad_norm": 0.37109375, "learning_rate": 0.0019524079567284644, "loss": 0.2572, "step": 10880 }, { "epoch": 0.019294685450704313, "grad_norm": 0.462890625, "learning_rate": 0.001952388856537121, "loss": 0.211, "step": 10882 }, { "epoch": 0.019298231616014128, "grad_norm": 0.671875, "learning_rate": 0.001952369752617924, "loss": 0.2447, "step": 10884 }, { "epoch": 0.019301777781323942, "grad_norm": 0.54296875, "learning_rate": 0.0019523506449709567, "loss": 0.2415, "step": 10886 }, { "epoch": 0.019305323946633757, "grad_norm": 0.349609375, "learning_rate": 0.0019523315335963026, "loss": 0.2132, "step": 10888 }, { "epoch": 0.019308870111943575, "grad_norm": 0.7890625, "learning_rate": 0.0019523124184940457, "loss": 0.2173, "step": 10890 }, { "epoch": 0.01931241627725339, "grad_norm": 0.58203125, "learning_rate": 0.0019522932996642694, "loss": 0.2229, "step": 10892 }, { "epoch": 0.019315962442563204, "grad_norm": 0.83203125, "learning_rate": 0.0019522741771070573, "loss": 0.2663, "step": 10894 }, { "epoch": 0.01931950860787302, "grad_norm": 0.375, "learning_rate": 0.0019522550508224929, "loss": 0.2241, "step": 10896 }, { "epoch": 0.019323054773182833, "grad_norm": 0.298828125, "learning_rate": 0.0019522359208106598, "loss": 0.2131, "step": 10898 }, { "epoch": 0.019326600938492648, "grad_norm": 0.46484375, "learning_rate": 0.0019522167870716421, "loss": 0.2677, "step": 10900 }, { "epoch": 0.019330147103802466, "grad_norm": 2.15625, "learning_rate": 0.001952197649605523, "loss": 0.3025, "step": 10902 }, { "epoch": 0.01933369326911228, "grad_norm": 0.515625, "learning_rate": 0.0019521785084123864, "loss": 0.2222, "step": 10904 }, { "epoch": 0.019337239434422095, "grad_norm": 0.453125, "learning_rate": 0.0019521593634923163, "loss": 0.2296, "step": 10906 }, { "epoch": 0.01934078559973191, "grad_norm": 0.5390625, "learning_rate": 0.0019521402148453958, "loss": 0.2072, "step": 10908 }, { "epoch": 0.019344331765041724, "grad_norm": 0.41796875, "learning_rate": 0.0019521210624717093, "loss": 0.2531, "step": 10910 }, { "epoch": 0.019347877930351542, "grad_norm": 0.353515625, "learning_rate": 0.0019521019063713397, "loss": 0.2032, "step": 10912 }, { "epoch": 0.019351424095661356, "grad_norm": 0.314453125, "learning_rate": 0.0019520827465443718, "loss": 0.2349, "step": 10914 }, { "epoch": 0.01935497026097117, "grad_norm": 1.375, "learning_rate": 0.0019520635829908886, "loss": 0.4852, "step": 10916 }, { "epoch": 0.019358516426280985, "grad_norm": 0.302734375, "learning_rate": 0.0019520444157109745, "loss": 0.1609, "step": 10918 }, { "epoch": 0.0193620625915908, "grad_norm": 0.99609375, "learning_rate": 0.001952025244704713, "loss": 0.2318, "step": 10920 }, { "epoch": 0.019365608756900615, "grad_norm": 1.7421875, "learning_rate": 0.0019520060699721878, "loss": 0.3149, "step": 10922 }, { "epoch": 0.019369154922210433, "grad_norm": 0.625, "learning_rate": 0.001951986891513483, "loss": 0.222, "step": 10924 }, { "epoch": 0.019372701087520247, "grad_norm": 0.53515625, "learning_rate": 0.0019519677093286826, "loss": 0.2297, "step": 10926 }, { "epoch": 0.01937624725283006, "grad_norm": 0.84375, "learning_rate": 0.0019519485234178705, "loss": 0.2908, "step": 10928 }, { "epoch": 0.019379793418139876, "grad_norm": 0.9453125, "learning_rate": 0.00195192933378113, "loss": 0.2186, "step": 10930 }, { "epoch": 0.01938333958344969, "grad_norm": 0.53125, "learning_rate": 0.0019519101404185456, "loss": 0.3206, "step": 10932 }, { "epoch": 0.019386885748759505, "grad_norm": 0.8671875, "learning_rate": 0.0019518909433302012, "loss": 0.2265, "step": 10934 }, { "epoch": 0.019390431914069323, "grad_norm": 0.78125, "learning_rate": 0.0019518717425161807, "loss": 0.1825, "step": 10936 }, { "epoch": 0.019393978079379138, "grad_norm": 0.40625, "learning_rate": 0.0019518525379765676, "loss": 0.193, "step": 10938 }, { "epoch": 0.019397524244688952, "grad_norm": 0.408203125, "learning_rate": 0.0019518333297114468, "loss": 0.2487, "step": 10940 }, { "epoch": 0.019401070409998767, "grad_norm": 0.3984375, "learning_rate": 0.0019518141177209015, "loss": 0.3763, "step": 10942 }, { "epoch": 0.01940461657530858, "grad_norm": 2.046875, "learning_rate": 0.0019517949020050162, "loss": 0.2061, "step": 10944 }, { "epoch": 0.0194081627406184, "grad_norm": 0.51171875, "learning_rate": 0.0019517756825638748, "loss": 0.2514, "step": 10946 }, { "epoch": 0.019411708905928214, "grad_norm": 0.96484375, "learning_rate": 0.0019517564593975615, "loss": 0.177, "step": 10948 }, { "epoch": 0.01941525507123803, "grad_norm": 0.7109375, "learning_rate": 0.0019517372325061598, "loss": 0.2142, "step": 10950 }, { "epoch": 0.019418801236547843, "grad_norm": 0.29296875, "learning_rate": 0.0019517180018897543, "loss": 0.1947, "step": 10952 }, { "epoch": 0.019422347401857658, "grad_norm": 0.3125, "learning_rate": 0.001951698767548429, "loss": 0.2146, "step": 10954 }, { "epoch": 0.019425893567167472, "grad_norm": 0.7890625, "learning_rate": 0.0019516795294822681, "loss": 0.2465, "step": 10956 }, { "epoch": 0.01942943973247729, "grad_norm": 0.58984375, "learning_rate": 0.0019516602876913558, "loss": 0.286, "step": 10958 }, { "epoch": 0.019432985897787105, "grad_norm": 0.5546875, "learning_rate": 0.0019516410421757757, "loss": 0.2414, "step": 10960 }, { "epoch": 0.01943653206309692, "grad_norm": 0.7890625, "learning_rate": 0.0019516217929356127, "loss": 0.2892, "step": 10962 }, { "epoch": 0.019440078228406734, "grad_norm": 1.5625, "learning_rate": 0.0019516025399709507, "loss": 0.3304, "step": 10964 }, { "epoch": 0.01944362439371655, "grad_norm": 0.7578125, "learning_rate": 0.0019515832832818739, "loss": 0.2137, "step": 10966 }, { "epoch": 0.019447170559026363, "grad_norm": 0.76171875, "learning_rate": 0.001951564022868466, "loss": 0.1856, "step": 10968 }, { "epoch": 0.01945071672433618, "grad_norm": 0.349609375, "learning_rate": 0.0019515447587308123, "loss": 0.2202, "step": 10970 }, { "epoch": 0.019454262889645996, "grad_norm": 0.671875, "learning_rate": 0.001951525490868996, "loss": 0.225, "step": 10972 }, { "epoch": 0.01945780905495581, "grad_norm": 0.43359375, "learning_rate": 0.001951506219283102, "loss": 0.263, "step": 10974 }, { "epoch": 0.019461355220265625, "grad_norm": 1.2109375, "learning_rate": 0.0019514869439732146, "loss": 0.2425, "step": 10976 }, { "epoch": 0.01946490138557544, "grad_norm": 1.5625, "learning_rate": 0.001951467664939418, "loss": 0.2397, "step": 10978 }, { "epoch": 0.019468447550885257, "grad_norm": 1.71875, "learning_rate": 0.001951448382181796, "loss": 0.527, "step": 10980 }, { "epoch": 0.019471993716195072, "grad_norm": 0.357421875, "learning_rate": 0.0019514290957004334, "loss": 0.24, "step": 10982 }, { "epoch": 0.019475539881504886, "grad_norm": 0.69140625, "learning_rate": 0.0019514098054954146, "loss": 0.2465, "step": 10984 }, { "epoch": 0.0194790860468147, "grad_norm": 0.53515625, "learning_rate": 0.0019513905115668237, "loss": 0.2122, "step": 10986 }, { "epoch": 0.019482632212124516, "grad_norm": 1.4296875, "learning_rate": 0.0019513712139147456, "loss": 0.4023, "step": 10988 }, { "epoch": 0.01948617837743433, "grad_norm": 0.28125, "learning_rate": 0.0019513519125392645, "loss": 0.2558, "step": 10990 }, { "epoch": 0.019489724542744148, "grad_norm": 1.78125, "learning_rate": 0.001951332607440464, "loss": 0.3743, "step": 10992 }, { "epoch": 0.019493270708053963, "grad_norm": 0.330078125, "learning_rate": 0.0019513132986184296, "loss": 0.26, "step": 10994 }, { "epoch": 0.019496816873363777, "grad_norm": 0.88671875, "learning_rate": 0.0019512939860732452, "loss": 0.2315, "step": 10996 }, { "epoch": 0.019500363038673592, "grad_norm": 0.21484375, "learning_rate": 0.0019512746698049958, "loss": 0.2222, "step": 10998 }, { "epoch": 0.019503909203983406, "grad_norm": 0.625, "learning_rate": 0.001951255349813765, "loss": 0.2595, "step": 11000 }, { "epoch": 0.01950745536929322, "grad_norm": 0.349609375, "learning_rate": 0.001951236026099638, "loss": 0.3253, "step": 11002 }, { "epoch": 0.01951100153460304, "grad_norm": 0.39453125, "learning_rate": 0.0019512166986626989, "loss": 0.2113, "step": 11004 }, { "epoch": 0.019514547699912854, "grad_norm": 0.32421875, "learning_rate": 0.0019511973675030326, "loss": 0.221, "step": 11006 }, { "epoch": 0.019518093865222668, "grad_norm": 0.86328125, "learning_rate": 0.0019511780326207234, "loss": 0.191, "step": 11008 }, { "epoch": 0.019521640030532483, "grad_norm": 0.36328125, "learning_rate": 0.0019511586940158556, "loss": 0.2271, "step": 11010 }, { "epoch": 0.019525186195842297, "grad_norm": 0.9140625, "learning_rate": 0.0019511393516885142, "loss": 0.2208, "step": 11012 }, { "epoch": 0.019528732361152115, "grad_norm": 0.98046875, "learning_rate": 0.001951120005638784, "loss": 0.2405, "step": 11014 }, { "epoch": 0.01953227852646193, "grad_norm": 0.58203125, "learning_rate": 0.0019511006558667492, "loss": 0.2341, "step": 11016 }, { "epoch": 0.019535824691771744, "grad_norm": 1.6796875, "learning_rate": 0.0019510813023724944, "loss": 0.5106, "step": 11018 }, { "epoch": 0.01953937085708156, "grad_norm": 0.890625, "learning_rate": 0.0019510619451561042, "loss": 0.2823, "step": 11020 }, { "epoch": 0.019542917022391373, "grad_norm": 0.54296875, "learning_rate": 0.0019510425842176639, "loss": 0.2425, "step": 11022 }, { "epoch": 0.019546463187701188, "grad_norm": 0.36328125, "learning_rate": 0.001951023219557257, "loss": 0.2201, "step": 11024 }, { "epoch": 0.019550009353011006, "grad_norm": 0.734375, "learning_rate": 0.0019510038511749692, "loss": 0.281, "step": 11026 }, { "epoch": 0.01955355551832082, "grad_norm": 0.484375, "learning_rate": 0.0019509844790708848, "loss": 0.2588, "step": 11028 }, { "epoch": 0.019557101683630635, "grad_norm": 0.43359375, "learning_rate": 0.0019509651032450887, "loss": 0.2392, "step": 11030 }, { "epoch": 0.01956064784894045, "grad_norm": 0.73828125, "learning_rate": 0.0019509457236976657, "loss": 0.3737, "step": 11032 }, { "epoch": 0.019564194014250264, "grad_norm": 0.375, "learning_rate": 0.0019509263404287004, "loss": 0.3482, "step": 11034 }, { "epoch": 0.01956774017956008, "grad_norm": 0.82421875, "learning_rate": 0.0019509069534382772, "loss": 0.2544, "step": 11036 }, { "epoch": 0.019571286344869897, "grad_norm": 0.40234375, "learning_rate": 0.0019508875627264814, "loss": 0.2289, "step": 11038 }, { "epoch": 0.01957483251017971, "grad_norm": 0.302734375, "learning_rate": 0.0019508681682933978, "loss": 0.2791, "step": 11040 }, { "epoch": 0.019578378675489526, "grad_norm": 0.423828125, "learning_rate": 0.001950848770139111, "loss": 0.2202, "step": 11042 }, { "epoch": 0.01958192484079934, "grad_norm": 0.41015625, "learning_rate": 0.0019508293682637056, "loss": 0.3792, "step": 11044 }, { "epoch": 0.019585471006109155, "grad_norm": 0.3984375, "learning_rate": 0.0019508099626672673, "loss": 0.1706, "step": 11046 }, { "epoch": 0.019589017171418973, "grad_norm": 0.953125, "learning_rate": 0.0019507905533498802, "loss": 0.229, "step": 11048 }, { "epoch": 0.019592563336728788, "grad_norm": 0.3359375, "learning_rate": 0.0019507711403116293, "loss": 0.2952, "step": 11050 }, { "epoch": 0.019596109502038602, "grad_norm": 0.259765625, "learning_rate": 0.0019507517235525997, "loss": 0.4037, "step": 11052 }, { "epoch": 0.019599655667348417, "grad_norm": 0.34765625, "learning_rate": 0.0019507323030728762, "loss": 0.2403, "step": 11054 }, { "epoch": 0.01960320183265823, "grad_norm": 0.310546875, "learning_rate": 0.0019507128788725438, "loss": 0.3066, "step": 11056 }, { "epoch": 0.019606747997968046, "grad_norm": 0.4921875, "learning_rate": 0.0019506934509516875, "loss": 0.2749, "step": 11058 }, { "epoch": 0.019610294163277864, "grad_norm": 12.125, "learning_rate": 0.001950674019310392, "loss": 0.2871, "step": 11060 }, { "epoch": 0.01961384032858768, "grad_norm": 10.8125, "learning_rate": 0.0019506545839487427, "loss": 0.3001, "step": 11062 }, { "epoch": 0.019617386493897493, "grad_norm": 0.95703125, "learning_rate": 0.001950635144866824, "loss": 0.2572, "step": 11064 }, { "epoch": 0.019620932659207307, "grad_norm": 0.47265625, "learning_rate": 0.0019506157020647216, "loss": 0.2778, "step": 11066 }, { "epoch": 0.019624478824517122, "grad_norm": 0.373046875, "learning_rate": 0.0019505962555425205, "loss": 0.2602, "step": 11068 }, { "epoch": 0.019628024989826937, "grad_norm": 0.66015625, "learning_rate": 0.0019505768053003048, "loss": 0.4013, "step": 11070 }, { "epoch": 0.019631571155136755, "grad_norm": 0.328125, "learning_rate": 0.001950557351338161, "loss": 0.2023, "step": 11072 }, { "epoch": 0.01963511732044657, "grad_norm": 1.3671875, "learning_rate": 0.001950537893656173, "loss": 0.2618, "step": 11074 }, { "epoch": 0.019638663485756384, "grad_norm": 0.3046875, "learning_rate": 0.0019505184322544259, "loss": 0.2211, "step": 11076 }, { "epoch": 0.019642209651066198, "grad_norm": 0.94921875, "learning_rate": 0.0019504989671330056, "loss": 0.2719, "step": 11078 }, { "epoch": 0.019645755816376013, "grad_norm": 0.6171875, "learning_rate": 0.001950479498291997, "loss": 0.3977, "step": 11080 }, { "epoch": 0.01964930198168583, "grad_norm": 0.64453125, "learning_rate": 0.0019504600257314849, "loss": 0.2069, "step": 11082 }, { "epoch": 0.019652848146995645, "grad_norm": 0.396484375, "learning_rate": 0.0019504405494515545, "loss": 0.2656, "step": 11084 }, { "epoch": 0.01965639431230546, "grad_norm": 1.6875, "learning_rate": 0.0019504210694522913, "loss": 0.3066, "step": 11086 }, { "epoch": 0.019659940477615274, "grad_norm": 7.4375, "learning_rate": 0.0019504015857337803, "loss": 0.2919, "step": 11088 }, { "epoch": 0.01966348664292509, "grad_norm": 0.287109375, "learning_rate": 0.0019503820982961068, "loss": 0.2172, "step": 11090 }, { "epoch": 0.019667032808234904, "grad_norm": 2.203125, "learning_rate": 0.001950362607139356, "loss": 0.6718, "step": 11092 }, { "epoch": 0.01967057897354472, "grad_norm": 0.9765625, "learning_rate": 0.0019503431122636131, "loss": 0.2758, "step": 11094 }, { "epoch": 0.019674125138854536, "grad_norm": 0.671875, "learning_rate": 0.0019503236136689632, "loss": 0.2416, "step": 11096 }, { "epoch": 0.01967767130416435, "grad_norm": 0.56640625, "learning_rate": 0.0019503041113554918, "loss": 0.2775, "step": 11098 }, { "epoch": 0.019681217469474165, "grad_norm": 0.39453125, "learning_rate": 0.0019502846053232844, "loss": 0.2401, "step": 11100 }, { "epoch": 0.01968476363478398, "grad_norm": 0.490234375, "learning_rate": 0.0019502650955724255, "loss": 0.2443, "step": 11102 }, { "epoch": 0.019688309800093794, "grad_norm": 2.03125, "learning_rate": 0.0019502455821030014, "loss": 0.24, "step": 11104 }, { "epoch": 0.019691855965403612, "grad_norm": 0.4453125, "learning_rate": 0.001950226064915097, "loss": 0.2128, "step": 11106 }, { "epoch": 0.019695402130713427, "grad_norm": 4.28125, "learning_rate": 0.0019502065440087975, "loss": 0.4816, "step": 11108 }, { "epoch": 0.01969894829602324, "grad_norm": 0.54296875, "learning_rate": 0.0019501870193841884, "loss": 0.1832, "step": 11110 }, { "epoch": 0.019702494461333056, "grad_norm": 1.734375, "learning_rate": 0.0019501674910413554, "loss": 0.2731, "step": 11112 }, { "epoch": 0.01970604062664287, "grad_norm": 0.515625, "learning_rate": 0.0019501479589803831, "loss": 0.2385, "step": 11114 }, { "epoch": 0.01970958679195269, "grad_norm": 0.390625, "learning_rate": 0.001950128423201358, "loss": 0.2106, "step": 11116 }, { "epoch": 0.019713132957262503, "grad_norm": 1.453125, "learning_rate": 0.0019501088837043648, "loss": 0.3689, "step": 11118 }, { "epoch": 0.019716679122572318, "grad_norm": 0.4921875, "learning_rate": 0.0019500893404894892, "loss": 0.2387, "step": 11120 }, { "epoch": 0.019720225287882132, "grad_norm": 0.96484375, "learning_rate": 0.0019500697935568166, "loss": 0.277, "step": 11122 }, { "epoch": 0.019723771453191947, "grad_norm": 1.3359375, "learning_rate": 0.0019500502429064324, "loss": 0.34, "step": 11124 }, { "epoch": 0.01972731761850176, "grad_norm": 0.53125, "learning_rate": 0.001950030688538422, "loss": 0.2054, "step": 11126 }, { "epoch": 0.01973086378381158, "grad_norm": 1.0546875, "learning_rate": 0.0019500111304528716, "loss": 0.2229, "step": 11128 }, { "epoch": 0.019734409949121394, "grad_norm": 3.0625, "learning_rate": 0.001949991568649866, "loss": 0.2168, "step": 11130 }, { "epoch": 0.01973795611443121, "grad_norm": 0.890625, "learning_rate": 0.001949972003129491, "loss": 0.3146, "step": 11132 }, { "epoch": 0.019741502279741023, "grad_norm": 1.375, "learning_rate": 0.0019499524338918322, "loss": 0.308, "step": 11134 }, { "epoch": 0.019745048445050838, "grad_norm": 0.62109375, "learning_rate": 0.001949932860936975, "loss": 0.2132, "step": 11136 }, { "epoch": 0.019748594610360652, "grad_norm": 1.2265625, "learning_rate": 0.0019499132842650056, "loss": 0.2605, "step": 11138 }, { "epoch": 0.01975214077567047, "grad_norm": 0.3828125, "learning_rate": 0.001949893703876009, "loss": 0.2386, "step": 11140 }, { "epoch": 0.019755686940980285, "grad_norm": 0.330078125, "learning_rate": 0.001949874119770071, "loss": 0.2523, "step": 11142 }, { "epoch": 0.0197592331062901, "grad_norm": 0.30859375, "learning_rate": 0.0019498545319472772, "loss": 0.2623, "step": 11144 }, { "epoch": 0.019762779271599914, "grad_norm": 0.48046875, "learning_rate": 0.0019498349404077132, "loss": 0.2426, "step": 11146 }, { "epoch": 0.01976632543690973, "grad_norm": 0.396484375, "learning_rate": 0.001949815345151465, "loss": 0.283, "step": 11148 }, { "epoch": 0.019769871602219546, "grad_norm": 0.447265625, "learning_rate": 0.0019497957461786183, "loss": 0.2172, "step": 11150 }, { "epoch": 0.01977341776752936, "grad_norm": 0.408203125, "learning_rate": 0.001949776143489258, "loss": 0.2157, "step": 11152 }, { "epoch": 0.019776963932839176, "grad_norm": 0.29296875, "learning_rate": 0.0019497565370834712, "loss": 0.3706, "step": 11154 }, { "epoch": 0.01978051009814899, "grad_norm": 0.375, "learning_rate": 0.0019497369269613424, "loss": 0.2533, "step": 11156 }, { "epoch": 0.019784056263458805, "grad_norm": 0.984375, "learning_rate": 0.001949717313122958, "loss": 0.4503, "step": 11158 }, { "epoch": 0.01978760242876862, "grad_norm": 0.2314453125, "learning_rate": 0.0019496976955684037, "loss": 0.2002, "step": 11160 }, { "epoch": 0.019791148594078437, "grad_norm": 0.7109375, "learning_rate": 0.0019496780742977653, "loss": 0.4008, "step": 11162 }, { "epoch": 0.019794694759388252, "grad_norm": 0.81640625, "learning_rate": 0.0019496584493111282, "loss": 0.2809, "step": 11164 }, { "epoch": 0.019798240924698066, "grad_norm": 1.828125, "learning_rate": 0.001949638820608579, "loss": 0.285, "step": 11166 }, { "epoch": 0.01980178709000788, "grad_norm": 1.5390625, "learning_rate": 0.0019496191881902033, "loss": 0.3015, "step": 11168 }, { "epoch": 0.019805333255317695, "grad_norm": 0.39453125, "learning_rate": 0.0019495995520560864, "loss": 0.2502, "step": 11170 }, { "epoch": 0.01980887942062751, "grad_norm": 0.359375, "learning_rate": 0.0019495799122063143, "loss": 0.2487, "step": 11172 }, { "epoch": 0.019812425585937328, "grad_norm": 1.0078125, "learning_rate": 0.0019495602686409738, "loss": 0.326, "step": 11174 }, { "epoch": 0.019815971751247143, "grad_norm": 0.2412109375, "learning_rate": 0.0019495406213601496, "loss": 0.1826, "step": 11176 }, { "epoch": 0.019819517916556957, "grad_norm": 2.0, "learning_rate": 0.0019495209703639287, "loss": 0.2762, "step": 11178 }, { "epoch": 0.01982306408186677, "grad_norm": 1.0546875, "learning_rate": 0.0019495013156523961, "loss": 0.3166, "step": 11180 }, { "epoch": 0.019826610247176586, "grad_norm": 0.267578125, "learning_rate": 0.0019494816572256384, "loss": 0.1833, "step": 11182 }, { "epoch": 0.019830156412486404, "grad_norm": 0.57421875, "learning_rate": 0.0019494619950837413, "loss": 0.43, "step": 11184 }, { "epoch": 0.01983370257779622, "grad_norm": 1.7109375, "learning_rate": 0.001949442329226791, "loss": 0.2684, "step": 11186 }, { "epoch": 0.019837248743106033, "grad_norm": 0.7109375, "learning_rate": 0.0019494226596548734, "loss": 0.1878, "step": 11188 }, { "epoch": 0.019840794908415848, "grad_norm": 0.3359375, "learning_rate": 0.0019494029863680743, "loss": 0.2336, "step": 11190 }, { "epoch": 0.019844341073725662, "grad_norm": 2.328125, "learning_rate": 0.00194938330936648, "loss": 0.3088, "step": 11192 }, { "epoch": 0.019847887239035477, "grad_norm": 10.6875, "learning_rate": 0.0019493636286501766, "loss": 0.2701, "step": 11194 }, { "epoch": 0.019851433404345295, "grad_norm": 5.875, "learning_rate": 0.00194934394421925, "loss": 0.207, "step": 11196 }, { "epoch": 0.01985497956965511, "grad_norm": 1.0859375, "learning_rate": 0.001949324256073786, "loss": 0.2564, "step": 11198 }, { "epoch": 0.019858525734964924, "grad_norm": 0.5546875, "learning_rate": 0.0019493045642138714, "loss": 0.2302, "step": 11200 }, { "epoch": 0.01986207190027474, "grad_norm": 1.734375, "learning_rate": 0.001949284868639592, "loss": 0.2123, "step": 11202 }, { "epoch": 0.019865618065584553, "grad_norm": 0.396484375, "learning_rate": 0.0019492651693510338, "loss": 0.2276, "step": 11204 }, { "epoch": 0.019869164230894368, "grad_norm": 0.6171875, "learning_rate": 0.0019492454663482832, "loss": 0.2275, "step": 11206 }, { "epoch": 0.019872710396204186, "grad_norm": 0.33984375, "learning_rate": 0.001949225759631426, "loss": 0.2468, "step": 11208 }, { "epoch": 0.019876256561514, "grad_norm": 0.79296875, "learning_rate": 0.0019492060492005488, "loss": 0.2124, "step": 11210 }, { "epoch": 0.019879802726823815, "grad_norm": 0.63671875, "learning_rate": 0.0019491863350557378, "loss": 0.2968, "step": 11212 }, { "epoch": 0.01988334889213363, "grad_norm": 0.3125, "learning_rate": 0.0019491666171970786, "loss": 0.2171, "step": 11214 }, { "epoch": 0.019886895057443444, "grad_norm": 0.451171875, "learning_rate": 0.001949146895624658, "loss": 0.1843, "step": 11216 }, { "epoch": 0.019890441222753262, "grad_norm": 0.2890625, "learning_rate": 0.0019491271703385622, "loss": 0.2565, "step": 11218 }, { "epoch": 0.019893987388063077, "grad_norm": 0.515625, "learning_rate": 0.0019491074413388774, "loss": 0.2432, "step": 11220 }, { "epoch": 0.01989753355337289, "grad_norm": 0.89453125, "learning_rate": 0.0019490877086256898, "loss": 0.2286, "step": 11222 }, { "epoch": 0.019901079718682706, "grad_norm": 0.6640625, "learning_rate": 0.0019490679721990858, "loss": 0.2235, "step": 11224 }, { "epoch": 0.01990462588399252, "grad_norm": 0.267578125, "learning_rate": 0.0019490482320591515, "loss": 0.1426, "step": 11226 }, { "epoch": 0.019908172049302335, "grad_norm": 0.482421875, "learning_rate": 0.001949028488205974, "loss": 0.2499, "step": 11228 }, { "epoch": 0.019911718214612153, "grad_norm": 0.47265625, "learning_rate": 0.0019490087406396387, "loss": 0.256, "step": 11230 }, { "epoch": 0.019915264379921967, "grad_norm": 0.8125, "learning_rate": 0.0019489889893602322, "loss": 0.2721, "step": 11232 }, { "epoch": 0.019918810545231782, "grad_norm": 0.54296875, "learning_rate": 0.001948969234367841, "loss": 0.2636, "step": 11234 }, { "epoch": 0.019922356710541596, "grad_norm": 0.345703125, "learning_rate": 0.0019489494756625516, "loss": 0.2813, "step": 11236 }, { "epoch": 0.01992590287585141, "grad_norm": 0.494140625, "learning_rate": 0.0019489297132444505, "loss": 0.3022, "step": 11238 }, { "epoch": 0.019929449041161226, "grad_norm": 0.2265625, "learning_rate": 0.0019489099471136238, "loss": 0.4236, "step": 11240 }, { "epoch": 0.019932995206471044, "grad_norm": 2.25, "learning_rate": 0.0019488901772701582, "loss": 0.265, "step": 11242 }, { "epoch": 0.019936541371780858, "grad_norm": 0.337890625, "learning_rate": 0.0019488704037141397, "loss": 0.2515, "step": 11244 }, { "epoch": 0.019940087537090673, "grad_norm": 0.21484375, "learning_rate": 0.0019488506264456556, "loss": 0.4747, "step": 11246 }, { "epoch": 0.019943633702400487, "grad_norm": 0.451171875, "learning_rate": 0.0019488308454647916, "loss": 0.3037, "step": 11248 }, { "epoch": 0.019947179867710302, "grad_norm": 1.0625, "learning_rate": 0.0019488110607716346, "loss": 0.2295, "step": 11250 }, { "epoch": 0.01995072603302012, "grad_norm": 1.7890625, "learning_rate": 0.0019487912723662715, "loss": 0.2905, "step": 11252 }, { "epoch": 0.019954272198329934, "grad_norm": 0.28515625, "learning_rate": 0.001948771480248788, "loss": 0.2631, "step": 11254 }, { "epoch": 0.01995781836363975, "grad_norm": 0.462890625, "learning_rate": 0.001948751684419271, "loss": 0.2534, "step": 11256 }, { "epoch": 0.019961364528949564, "grad_norm": 0.482421875, "learning_rate": 0.0019487318848778073, "loss": 0.1893, "step": 11258 }, { "epoch": 0.019964910694259378, "grad_norm": 4.65625, "learning_rate": 0.0019487120816244834, "loss": 0.4082, "step": 11260 }, { "epoch": 0.019968456859569193, "grad_norm": 0.455078125, "learning_rate": 0.0019486922746593856, "loss": 0.216, "step": 11262 }, { "epoch": 0.01997200302487901, "grad_norm": 0.4609375, "learning_rate": 0.001948672463982601, "loss": 0.2413, "step": 11264 }, { "epoch": 0.019975549190188825, "grad_norm": 0.328125, "learning_rate": 0.0019486526495942158, "loss": 0.3381, "step": 11266 }, { "epoch": 0.01997909535549864, "grad_norm": 0.73828125, "learning_rate": 0.001948632831494317, "loss": 0.2167, "step": 11268 }, { "epoch": 0.019982641520808454, "grad_norm": 0.228515625, "learning_rate": 0.001948613009682991, "loss": 0.221, "step": 11270 }, { "epoch": 0.01998618768611827, "grad_norm": 0.498046875, "learning_rate": 0.001948593184160325, "loss": 0.2228, "step": 11272 }, { "epoch": 0.019989733851428083, "grad_norm": 0.451171875, "learning_rate": 0.001948573354926405, "loss": 0.3056, "step": 11274 }, { "epoch": 0.0199932800167379, "grad_norm": 0.3671875, "learning_rate": 0.0019485535219813182, "loss": 0.2726, "step": 11276 }, { "epoch": 0.019996826182047716, "grad_norm": 0.1884765625, "learning_rate": 0.0019485336853251511, "loss": 0.2645, "step": 11278 }, { "epoch": 0.02000037234735753, "grad_norm": 0.3125, "learning_rate": 0.001948513844957991, "loss": 0.2255, "step": 11280 }, { "epoch": 0.020003918512667345, "grad_norm": 1.765625, "learning_rate": 0.0019484940008799236, "loss": 0.2621, "step": 11282 }, { "epoch": 0.02000746467797716, "grad_norm": 0.380859375, "learning_rate": 0.0019484741530910363, "loss": 0.1661, "step": 11284 }, { "epoch": 0.020011010843286978, "grad_norm": 0.291015625, "learning_rate": 0.0019484543015914162, "loss": 0.3313, "step": 11286 }, { "epoch": 0.020014557008596792, "grad_norm": 0.490234375, "learning_rate": 0.0019484344463811501, "loss": 0.1959, "step": 11288 }, { "epoch": 0.020018103173906607, "grad_norm": 0.859375, "learning_rate": 0.001948414587460324, "loss": 0.2805, "step": 11290 }, { "epoch": 0.02002164933921642, "grad_norm": 0.52734375, "learning_rate": 0.0019483947248290256, "loss": 0.2232, "step": 11292 }, { "epoch": 0.020025195504526236, "grad_norm": 1.921875, "learning_rate": 0.0019483748584873412, "loss": 0.2612, "step": 11294 }, { "epoch": 0.02002874166983605, "grad_norm": 0.8046875, "learning_rate": 0.001948354988435358, "loss": 0.5627, "step": 11296 }, { "epoch": 0.02003228783514587, "grad_norm": 0.25, "learning_rate": 0.0019483351146731634, "loss": 0.215, "step": 11298 }, { "epoch": 0.020035834000455683, "grad_norm": 0.546875, "learning_rate": 0.0019483152372008433, "loss": 0.2519, "step": 11300 }, { "epoch": 0.020039380165765498, "grad_norm": 0.2578125, "learning_rate": 0.0019482953560184854, "loss": 0.2748, "step": 11302 }, { "epoch": 0.020042926331075312, "grad_norm": 0.8515625, "learning_rate": 0.001948275471126176, "loss": 0.2328, "step": 11304 }, { "epoch": 0.020046472496385127, "grad_norm": 0.26171875, "learning_rate": 0.0019482555825240026, "loss": 0.2138, "step": 11306 }, { "epoch": 0.02005001866169494, "grad_norm": 0.2578125, "learning_rate": 0.0019482356902120522, "loss": 0.2024, "step": 11308 }, { "epoch": 0.02005356482700476, "grad_norm": 0.49609375, "learning_rate": 0.0019482157941904112, "loss": 0.235, "step": 11310 }, { "epoch": 0.020057110992314574, "grad_norm": 1.9453125, "learning_rate": 0.0019481958944591675, "loss": 0.4309, "step": 11312 }, { "epoch": 0.02006065715762439, "grad_norm": 0.50390625, "learning_rate": 0.0019481759910184072, "loss": 0.2425, "step": 11314 }, { "epoch": 0.020064203322934203, "grad_norm": 0.33984375, "learning_rate": 0.0019481560838682183, "loss": 0.2217, "step": 11316 }, { "epoch": 0.020067749488244017, "grad_norm": 0.357421875, "learning_rate": 0.001948136173008687, "loss": 0.2315, "step": 11318 }, { "epoch": 0.020071295653553835, "grad_norm": 0.46484375, "learning_rate": 0.0019481162584399005, "loss": 0.2198, "step": 11320 }, { "epoch": 0.02007484181886365, "grad_norm": 0.80078125, "learning_rate": 0.0019480963401619464, "loss": 0.3774, "step": 11322 }, { "epoch": 0.020078387984173465, "grad_norm": 1.15625, "learning_rate": 0.0019480764181749118, "loss": 0.1891, "step": 11324 }, { "epoch": 0.02008193414948328, "grad_norm": 0.345703125, "learning_rate": 0.001948056492478883, "loss": 0.1933, "step": 11326 }, { "epoch": 0.020085480314793094, "grad_norm": 1.0859375, "learning_rate": 0.0019480365630739484, "loss": 0.42, "step": 11328 }, { "epoch": 0.020089026480102908, "grad_norm": 0.828125, "learning_rate": 0.001948016629960194, "loss": 0.1988, "step": 11330 }, { "epoch": 0.020092572645412726, "grad_norm": 1.0234375, "learning_rate": 0.0019479966931377076, "loss": 0.408, "step": 11332 }, { "epoch": 0.02009611881072254, "grad_norm": 0.5703125, "learning_rate": 0.001947976752606576, "loss": 0.2296, "step": 11334 }, { "epoch": 0.020099664976032355, "grad_norm": 0.25, "learning_rate": 0.0019479568083668871, "loss": 0.2747, "step": 11336 }, { "epoch": 0.02010321114134217, "grad_norm": 0.31640625, "learning_rate": 0.0019479368604187273, "loss": 0.2475, "step": 11338 }, { "epoch": 0.020106757306651984, "grad_norm": 2.40625, "learning_rate": 0.0019479169087621843, "loss": 0.3176, "step": 11340 }, { "epoch": 0.0201103034719618, "grad_norm": 0.49609375, "learning_rate": 0.0019478969533973452, "loss": 0.2713, "step": 11342 }, { "epoch": 0.020113849637271617, "grad_norm": 0.326171875, "learning_rate": 0.0019478769943242975, "loss": 0.2049, "step": 11344 }, { "epoch": 0.02011739580258143, "grad_norm": 0.3125, "learning_rate": 0.0019478570315431282, "loss": 0.217, "step": 11346 }, { "epoch": 0.020120941967891246, "grad_norm": 2.140625, "learning_rate": 0.0019478370650539247, "loss": 0.2857, "step": 11348 }, { "epoch": 0.02012448813320106, "grad_norm": 2.265625, "learning_rate": 0.001947817094856775, "loss": 0.3395, "step": 11350 }, { "epoch": 0.020128034298510875, "grad_norm": 0.484375, "learning_rate": 0.001947797120951765, "loss": 0.3197, "step": 11352 }, { "epoch": 0.020131580463820693, "grad_norm": 0.91015625, "learning_rate": 0.001947777143338983, "loss": 0.1779, "step": 11354 }, { "epoch": 0.020135126629130508, "grad_norm": 0.83203125, "learning_rate": 0.0019477571620185165, "loss": 0.201, "step": 11356 }, { "epoch": 0.020138672794440322, "grad_norm": 0.35546875, "learning_rate": 0.0019477371769904522, "loss": 0.2357, "step": 11358 }, { "epoch": 0.020142218959750137, "grad_norm": 1.0625, "learning_rate": 0.0019477171882548781, "loss": 0.5784, "step": 11360 }, { "epoch": 0.02014576512505995, "grad_norm": 1.5859375, "learning_rate": 0.0019476971958118817, "loss": 0.2947, "step": 11362 }, { "epoch": 0.020149311290369766, "grad_norm": 0.61328125, "learning_rate": 0.00194767719966155, "loss": 0.2103, "step": 11364 }, { "epoch": 0.020152857455679584, "grad_norm": 0.9296875, "learning_rate": 0.0019476571998039707, "loss": 0.1959, "step": 11366 }, { "epoch": 0.0201564036209894, "grad_norm": 0.5078125, "learning_rate": 0.0019476371962392307, "loss": 0.1873, "step": 11368 }, { "epoch": 0.020159949786299213, "grad_norm": 0.34765625, "learning_rate": 0.0019476171889674185, "loss": 0.2362, "step": 11370 }, { "epoch": 0.020163495951609028, "grad_norm": 0.57421875, "learning_rate": 0.0019475971779886207, "loss": 0.2522, "step": 11372 }, { "epoch": 0.020167042116918842, "grad_norm": 0.57421875, "learning_rate": 0.0019475771633029255, "loss": 0.2236, "step": 11374 }, { "epoch": 0.020170588282228657, "grad_norm": 0.5703125, "learning_rate": 0.0019475571449104202, "loss": 0.2101, "step": 11376 }, { "epoch": 0.020174134447538475, "grad_norm": 0.7109375, "learning_rate": 0.001947537122811192, "loss": 0.2019, "step": 11378 }, { "epoch": 0.02017768061284829, "grad_norm": 0.328125, "learning_rate": 0.0019475170970053289, "loss": 0.2312, "step": 11380 }, { "epoch": 0.020181226778158104, "grad_norm": 1.6875, "learning_rate": 0.0019474970674929182, "loss": 0.2084, "step": 11382 }, { "epoch": 0.02018477294346792, "grad_norm": 0.62890625, "learning_rate": 0.0019474770342740478, "loss": 0.2404, "step": 11384 }, { "epoch": 0.020188319108777733, "grad_norm": 0.546875, "learning_rate": 0.0019474569973488049, "loss": 0.184, "step": 11386 }, { "epoch": 0.02019186527408755, "grad_norm": 0.54296875, "learning_rate": 0.001947436956717277, "loss": 0.4145, "step": 11388 }, { "epoch": 0.020195411439397366, "grad_norm": 0.451171875, "learning_rate": 0.001947416912379553, "loss": 0.2548, "step": 11390 }, { "epoch": 0.02019895760470718, "grad_norm": 0.78125, "learning_rate": 0.001947396864335719, "loss": 0.2814, "step": 11392 }, { "epoch": 0.020202503770016995, "grad_norm": 0.59765625, "learning_rate": 0.0019473768125858632, "loss": 0.3115, "step": 11394 }, { "epoch": 0.02020604993532681, "grad_norm": 0.83984375, "learning_rate": 0.0019473567571300738, "loss": 0.2221, "step": 11396 }, { "epoch": 0.020209596100636624, "grad_norm": 0.408203125, "learning_rate": 0.001947336697968438, "loss": 0.325, "step": 11398 }, { "epoch": 0.020213142265946442, "grad_norm": 0.55859375, "learning_rate": 0.0019473166351010442, "loss": 0.1654, "step": 11400 }, { "epoch": 0.020216688431256256, "grad_norm": 0.5546875, "learning_rate": 0.001947296568527979, "loss": 0.2179, "step": 11402 }, { "epoch": 0.02022023459656607, "grad_norm": 0.26171875, "learning_rate": 0.0019472764982493309, "loss": 0.2067, "step": 11404 }, { "epoch": 0.020223780761875886, "grad_norm": 0.63671875, "learning_rate": 0.0019472564242651877, "loss": 0.2715, "step": 11406 }, { "epoch": 0.0202273269271857, "grad_norm": 0.421875, "learning_rate": 0.001947236346575637, "loss": 0.177, "step": 11408 }, { "epoch": 0.020230873092495515, "grad_norm": 1.375, "learning_rate": 0.0019472162651807668, "loss": 0.298, "step": 11410 }, { "epoch": 0.020234419257805333, "grad_norm": 0.208984375, "learning_rate": 0.0019471961800806646, "loss": 0.2611, "step": 11412 }, { "epoch": 0.020237965423115147, "grad_norm": 0.65625, "learning_rate": 0.0019471760912754185, "loss": 0.1969, "step": 11414 }, { "epoch": 0.020241511588424962, "grad_norm": 0.671875, "learning_rate": 0.001947155998765116, "loss": 0.2581, "step": 11416 }, { "epoch": 0.020245057753734776, "grad_norm": 0.33203125, "learning_rate": 0.0019471359025498454, "loss": 0.218, "step": 11418 }, { "epoch": 0.02024860391904459, "grad_norm": 3.015625, "learning_rate": 0.0019471158026296946, "loss": 0.4829, "step": 11420 }, { "epoch": 0.02025215008435441, "grad_norm": 0.482421875, "learning_rate": 0.0019470956990047512, "loss": 0.1781, "step": 11422 }, { "epoch": 0.020255696249664223, "grad_norm": 2.5, "learning_rate": 0.0019470755916751034, "loss": 0.3491, "step": 11424 }, { "epoch": 0.020259242414974038, "grad_norm": 0.578125, "learning_rate": 0.001947055480640839, "loss": 0.7109, "step": 11426 }, { "epoch": 0.020262788580283853, "grad_norm": 0.2470703125, "learning_rate": 0.0019470353659020456, "loss": 0.2406, "step": 11428 }, { "epoch": 0.020266334745593667, "grad_norm": 0.8515625, "learning_rate": 0.0019470152474588118, "loss": 0.2212, "step": 11430 }, { "epoch": 0.02026988091090348, "grad_norm": 0.474609375, "learning_rate": 0.0019469951253112253, "loss": 0.2106, "step": 11432 }, { "epoch": 0.0202734270762133, "grad_norm": 4.71875, "learning_rate": 0.001946974999459374, "loss": 0.3944, "step": 11434 }, { "epoch": 0.020276973241523114, "grad_norm": 0.275390625, "learning_rate": 0.0019469548699033463, "loss": 0.1671, "step": 11436 }, { "epoch": 0.02028051940683293, "grad_norm": 0.390625, "learning_rate": 0.0019469347366432297, "loss": 0.2384, "step": 11438 }, { "epoch": 0.020284065572142743, "grad_norm": 0.3203125, "learning_rate": 0.0019469145996791127, "loss": 0.2598, "step": 11440 }, { "epoch": 0.020287611737452558, "grad_norm": 0.76171875, "learning_rate": 0.001946894459011083, "loss": 0.436, "step": 11442 }, { "epoch": 0.020291157902762372, "grad_norm": 0.31640625, "learning_rate": 0.001946874314639229, "loss": 0.2505, "step": 11444 }, { "epoch": 0.02029470406807219, "grad_norm": 0.353515625, "learning_rate": 0.0019468541665636388, "loss": 0.2663, "step": 11446 }, { "epoch": 0.020298250233382005, "grad_norm": 0.68359375, "learning_rate": 0.0019468340147844004, "loss": 0.1851, "step": 11448 }, { "epoch": 0.02030179639869182, "grad_norm": 0.61328125, "learning_rate": 0.0019468138593016016, "loss": 0.2056, "step": 11450 }, { "epoch": 0.020305342564001634, "grad_norm": 0.65625, "learning_rate": 0.001946793700115331, "loss": 0.2989, "step": 11452 }, { "epoch": 0.02030888872931145, "grad_norm": 0.74609375, "learning_rate": 0.0019467735372256764, "loss": 0.3187, "step": 11454 }, { "epoch": 0.020312434894621267, "grad_norm": 0.3515625, "learning_rate": 0.0019467533706327268, "loss": 0.2322, "step": 11456 }, { "epoch": 0.02031598105993108, "grad_norm": 20.875, "learning_rate": 0.0019467332003365694, "loss": 0.256, "step": 11458 }, { "epoch": 0.020319527225240896, "grad_norm": 0.65234375, "learning_rate": 0.001946713026337293, "loss": 0.2115, "step": 11460 }, { "epoch": 0.02032307339055071, "grad_norm": 0.44140625, "learning_rate": 0.0019466928486349855, "loss": 0.2224, "step": 11462 }, { "epoch": 0.020326619555860525, "grad_norm": 1.03125, "learning_rate": 0.0019466726672297354, "loss": 0.2574, "step": 11464 }, { "epoch": 0.02033016572117034, "grad_norm": 1.1484375, "learning_rate": 0.001946652482121631, "loss": 0.2771, "step": 11466 }, { "epoch": 0.020333711886480157, "grad_norm": 0.5546875, "learning_rate": 0.0019466322933107602, "loss": 0.2037, "step": 11468 }, { "epoch": 0.020337258051789972, "grad_norm": 0.388671875, "learning_rate": 0.0019466121007972112, "loss": 0.2151, "step": 11470 }, { "epoch": 0.020340804217099787, "grad_norm": 0.3359375, "learning_rate": 0.0019465919045810734, "loss": 0.1725, "step": 11472 }, { "epoch": 0.0203443503824096, "grad_norm": 0.32421875, "learning_rate": 0.001946571704662434, "loss": 0.374, "step": 11474 }, { "epoch": 0.020347896547719416, "grad_norm": 0.423828125, "learning_rate": 0.0019465515010413817, "loss": 0.157, "step": 11476 }, { "epoch": 0.02035144271302923, "grad_norm": 0.84765625, "learning_rate": 0.0019465312937180048, "loss": 0.4223, "step": 11478 }, { "epoch": 0.02035498887833905, "grad_norm": 0.2275390625, "learning_rate": 0.001946511082692392, "loss": 0.1701, "step": 11480 }, { "epoch": 0.020358535043648863, "grad_norm": 1.4296875, "learning_rate": 0.0019464908679646309, "loss": 0.2756, "step": 11482 }, { "epoch": 0.020362081208958677, "grad_norm": 0.85546875, "learning_rate": 0.001946470649534811, "loss": 0.274, "step": 11484 }, { "epoch": 0.020365627374268492, "grad_norm": 0.4921875, "learning_rate": 0.0019464504274030198, "loss": 0.2443, "step": 11486 }, { "epoch": 0.020369173539578306, "grad_norm": 0.57421875, "learning_rate": 0.0019464302015693464, "loss": 0.1874, "step": 11488 }, { "epoch": 0.020372719704888125, "grad_norm": 0.33203125, "learning_rate": 0.0019464099720338788, "loss": 0.1898, "step": 11490 }, { "epoch": 0.02037626587019794, "grad_norm": 0.279296875, "learning_rate": 0.0019463897387967059, "loss": 0.2622, "step": 11492 }, { "epoch": 0.020379812035507754, "grad_norm": 0.455078125, "learning_rate": 0.0019463695018579158, "loss": 0.1773, "step": 11494 }, { "epoch": 0.020383358200817568, "grad_norm": 0.265625, "learning_rate": 0.0019463492612175968, "loss": 0.2106, "step": 11496 }, { "epoch": 0.020386904366127383, "grad_norm": 1.4453125, "learning_rate": 0.001946329016875838, "loss": 0.2889, "step": 11498 }, { "epoch": 0.020390450531437197, "grad_norm": 0.44140625, "learning_rate": 0.0019463087688327276, "loss": 0.2176, "step": 11500 }, { "epoch": 0.020393996696747015, "grad_norm": 0.78515625, "learning_rate": 0.0019462885170883544, "loss": 0.182, "step": 11502 }, { "epoch": 0.02039754286205683, "grad_norm": 0.3046875, "learning_rate": 0.0019462682616428067, "loss": 0.19, "step": 11504 }, { "epoch": 0.020401089027366644, "grad_norm": 1.234375, "learning_rate": 0.0019462480024961732, "loss": 0.2702, "step": 11506 }, { "epoch": 0.02040463519267646, "grad_norm": 0.88671875, "learning_rate": 0.0019462277396485423, "loss": 0.2376, "step": 11508 }, { "epoch": 0.020408181357986274, "grad_norm": 1.21875, "learning_rate": 0.0019462074731000033, "loss": 0.2597, "step": 11510 }, { "epoch": 0.020411727523296088, "grad_norm": 0.5, "learning_rate": 0.0019461872028506439, "loss": 0.1977, "step": 11512 }, { "epoch": 0.020415273688605906, "grad_norm": 0.58203125, "learning_rate": 0.0019461669289005535, "loss": 0.1813, "step": 11514 }, { "epoch": 0.02041881985391572, "grad_norm": 0.43359375, "learning_rate": 0.0019461466512498203, "loss": 0.2265, "step": 11516 }, { "epoch": 0.020422366019225535, "grad_norm": 4.8125, "learning_rate": 0.0019461263698985333, "loss": 0.2465, "step": 11518 }, { "epoch": 0.02042591218453535, "grad_norm": 6.5, "learning_rate": 0.0019461060848467806, "loss": 0.2049, "step": 11520 }, { "epoch": 0.020429458349845164, "grad_norm": 0.87890625, "learning_rate": 0.001946085796094652, "loss": 0.2118, "step": 11522 }, { "epoch": 0.020433004515154982, "grad_norm": 3.5625, "learning_rate": 0.0019460655036422352, "loss": 0.2437, "step": 11524 }, { "epoch": 0.020436550680464797, "grad_norm": 0.423828125, "learning_rate": 0.0019460452074896194, "loss": 0.1848, "step": 11526 }, { "epoch": 0.02044009684577461, "grad_norm": 0.6328125, "learning_rate": 0.0019460249076368934, "loss": 0.2134, "step": 11528 }, { "epoch": 0.020443643011084426, "grad_norm": 1.28125, "learning_rate": 0.0019460046040841459, "loss": 0.3412, "step": 11530 }, { "epoch": 0.02044718917639424, "grad_norm": 1.0390625, "learning_rate": 0.0019459842968314654, "loss": 0.2589, "step": 11532 }, { "epoch": 0.020450735341704055, "grad_norm": 1.1171875, "learning_rate": 0.0019459639858789414, "loss": 0.3058, "step": 11534 }, { "epoch": 0.020454281507013873, "grad_norm": 1.328125, "learning_rate": 0.0019459436712266618, "loss": 0.2385, "step": 11536 }, { "epoch": 0.020457827672323688, "grad_norm": 1.0625, "learning_rate": 0.0019459233528747164, "loss": 0.322, "step": 11538 }, { "epoch": 0.020461373837633502, "grad_norm": 0.55078125, "learning_rate": 0.0019459030308231935, "loss": 0.4301, "step": 11540 }, { "epoch": 0.020464920002943317, "grad_norm": 2.203125, "learning_rate": 0.0019458827050721824, "loss": 0.1894, "step": 11542 }, { "epoch": 0.02046846616825313, "grad_norm": 0.57421875, "learning_rate": 0.0019458623756217713, "loss": 0.2129, "step": 11544 }, { "epoch": 0.020472012333562946, "grad_norm": 1.203125, "learning_rate": 0.0019458420424720492, "loss": 0.2497, "step": 11546 }, { "epoch": 0.020475558498872764, "grad_norm": 0.33984375, "learning_rate": 0.001945821705623106, "loss": 0.2217, "step": 11548 }, { "epoch": 0.02047910466418258, "grad_norm": 0.5390625, "learning_rate": 0.0019458013650750297, "loss": 0.2514, "step": 11550 }, { "epoch": 0.020482650829492393, "grad_norm": 3.578125, "learning_rate": 0.0019457810208279097, "loss": 0.3662, "step": 11552 }, { "epoch": 0.020486196994802208, "grad_norm": 0.271484375, "learning_rate": 0.0019457606728818342, "loss": 0.2547, "step": 11554 }, { "epoch": 0.020489743160112022, "grad_norm": 0.416015625, "learning_rate": 0.0019457403212368935, "loss": 0.1947, "step": 11556 }, { "epoch": 0.02049328932542184, "grad_norm": 0.30078125, "learning_rate": 0.0019457199658931756, "loss": 0.232, "step": 11558 }, { "epoch": 0.020496835490731655, "grad_norm": 1.0859375, "learning_rate": 0.0019456996068507697, "loss": 0.2397, "step": 11560 }, { "epoch": 0.02050038165604147, "grad_norm": 0.6171875, "learning_rate": 0.001945679244109765, "loss": 0.2404, "step": 11562 }, { "epoch": 0.020503927821351284, "grad_norm": 0.41796875, "learning_rate": 0.0019456588776702508, "loss": 0.1804, "step": 11564 }, { "epoch": 0.0205074739866611, "grad_norm": 0.77734375, "learning_rate": 0.0019456385075323158, "loss": 0.2784, "step": 11566 }, { "epoch": 0.020511020151970913, "grad_norm": 0.9609375, "learning_rate": 0.0019456181336960491, "loss": 0.2522, "step": 11568 }, { "epoch": 0.02051456631728073, "grad_norm": 0.42578125, "learning_rate": 0.0019455977561615397, "loss": 0.2078, "step": 11570 }, { "epoch": 0.020518112482590545, "grad_norm": 0.5234375, "learning_rate": 0.0019455773749288772, "loss": 0.1963, "step": 11572 }, { "epoch": 0.02052165864790036, "grad_norm": 0.59765625, "learning_rate": 0.0019455569899981503, "loss": 0.2886, "step": 11574 }, { "epoch": 0.020525204813210175, "grad_norm": 0.55859375, "learning_rate": 0.0019455366013694483, "loss": 0.2404, "step": 11576 }, { "epoch": 0.02052875097851999, "grad_norm": 0.306640625, "learning_rate": 0.0019455162090428603, "loss": 0.3266, "step": 11578 }, { "epoch": 0.020532297143829804, "grad_norm": 1.25, "learning_rate": 0.0019454958130184755, "loss": 0.2087, "step": 11580 }, { "epoch": 0.02053584330913962, "grad_norm": 0.419921875, "learning_rate": 0.0019454754132963831, "loss": 0.2971, "step": 11582 }, { "epoch": 0.020539389474449436, "grad_norm": 0.61328125, "learning_rate": 0.0019454550098766726, "loss": 0.2213, "step": 11584 }, { "epoch": 0.02054293563975925, "grad_norm": 1.375, "learning_rate": 0.0019454346027594327, "loss": 0.2925, "step": 11586 }, { "epoch": 0.020546481805069065, "grad_norm": 1.453125, "learning_rate": 0.001945414191944753, "loss": 0.2987, "step": 11588 }, { "epoch": 0.02055002797037888, "grad_norm": 0.306640625, "learning_rate": 0.001945393777432723, "loss": 0.2941, "step": 11590 }, { "epoch": 0.020553574135688698, "grad_norm": 0.62109375, "learning_rate": 0.0019453733592234312, "loss": 0.2371, "step": 11592 }, { "epoch": 0.020557120300998512, "grad_norm": 0.796875, "learning_rate": 0.0019453529373169678, "loss": 0.3297, "step": 11594 }, { "epoch": 0.020560666466308327, "grad_norm": 0.60546875, "learning_rate": 0.0019453325117134216, "loss": 0.2031, "step": 11596 }, { "epoch": 0.02056421263161814, "grad_norm": 0.55078125, "learning_rate": 0.0019453120824128817, "loss": 0.281, "step": 11598 }, { "epoch": 0.020567758796927956, "grad_norm": 0.291015625, "learning_rate": 0.001945291649415438, "loss": 0.2049, "step": 11600 }, { "epoch": 0.02057130496223777, "grad_norm": 0.875, "learning_rate": 0.0019452712127211796, "loss": 0.3167, "step": 11602 }, { "epoch": 0.02057485112754759, "grad_norm": 0.451171875, "learning_rate": 0.001945250772330196, "loss": 0.2726, "step": 11604 }, { "epoch": 0.020578397292857403, "grad_norm": 0.24609375, "learning_rate": 0.0019452303282425766, "loss": 0.1802, "step": 11606 }, { "epoch": 0.020581943458167218, "grad_norm": 1.375, "learning_rate": 0.0019452098804584104, "loss": 0.2708, "step": 11608 }, { "epoch": 0.020585489623477032, "grad_norm": 0.625, "learning_rate": 0.0019451894289777873, "loss": 0.2571, "step": 11610 }, { "epoch": 0.020589035788786847, "grad_norm": 0.6484375, "learning_rate": 0.0019451689738007965, "loss": 0.2276, "step": 11612 }, { "epoch": 0.02059258195409666, "grad_norm": 0.44921875, "learning_rate": 0.0019451485149275278, "loss": 0.2533, "step": 11614 }, { "epoch": 0.02059612811940648, "grad_norm": 0.73828125, "learning_rate": 0.00194512805235807, "loss": 0.2683, "step": 11616 }, { "epoch": 0.020599674284716294, "grad_norm": 0.462890625, "learning_rate": 0.0019451075860925135, "loss": 0.3028, "step": 11618 }, { "epoch": 0.02060322045002611, "grad_norm": 0.65625, "learning_rate": 0.001945087116130947, "loss": 0.2313, "step": 11620 }, { "epoch": 0.020606766615335923, "grad_norm": 0.6953125, "learning_rate": 0.0019450666424734601, "loss": 0.2631, "step": 11622 }, { "epoch": 0.020610312780645738, "grad_norm": 0.38671875, "learning_rate": 0.001945046165120143, "loss": 0.2482, "step": 11624 }, { "epoch": 0.020613858945955556, "grad_norm": 0.85546875, "learning_rate": 0.0019450256840710847, "loss": 0.2074, "step": 11626 }, { "epoch": 0.02061740511126537, "grad_norm": 0.25390625, "learning_rate": 0.001945005199326375, "loss": 0.204, "step": 11628 }, { "epoch": 0.020620951276575185, "grad_norm": 0.380859375, "learning_rate": 0.0019449847108861033, "loss": 0.2374, "step": 11630 }, { "epoch": 0.020624497441885, "grad_norm": 0.2353515625, "learning_rate": 0.0019449642187503594, "loss": 0.22, "step": 11632 }, { "epoch": 0.020628043607194814, "grad_norm": 0.4375, "learning_rate": 0.001944943722919233, "loss": 0.2157, "step": 11634 }, { "epoch": 0.02063158977250463, "grad_norm": 0.76953125, "learning_rate": 0.0019449232233928128, "loss": 0.2011, "step": 11636 }, { "epoch": 0.020635135937814447, "grad_norm": 0.466796875, "learning_rate": 0.00194490272017119, "loss": 0.209, "step": 11638 }, { "epoch": 0.02063868210312426, "grad_norm": 1.3828125, "learning_rate": 0.0019448822132544531, "loss": 0.4375, "step": 11640 }, { "epoch": 0.020642228268434076, "grad_norm": 0.3671875, "learning_rate": 0.0019448617026426923, "loss": 0.175, "step": 11642 }, { "epoch": 0.02064577443374389, "grad_norm": 3.53125, "learning_rate": 0.0019448411883359969, "loss": 0.6556, "step": 11644 }, { "epoch": 0.020649320599053705, "grad_norm": 0.4453125, "learning_rate": 0.001944820670334457, "loss": 0.2209, "step": 11646 }, { "epoch": 0.02065286676436352, "grad_norm": 1.328125, "learning_rate": 0.0019448001486381625, "loss": 0.2888, "step": 11648 }, { "epoch": 0.020656412929673337, "grad_norm": 1.1171875, "learning_rate": 0.0019447796232472025, "loss": 0.3793, "step": 11650 }, { "epoch": 0.020659959094983152, "grad_norm": 0.34375, "learning_rate": 0.0019447590941616675, "loss": 0.2187, "step": 11652 }, { "epoch": 0.020663505260292966, "grad_norm": 0.4375, "learning_rate": 0.0019447385613816466, "loss": 0.1953, "step": 11654 }, { "epoch": 0.02066705142560278, "grad_norm": 0.322265625, "learning_rate": 0.0019447180249072304, "loss": 0.2351, "step": 11656 }, { "epoch": 0.020670597590912596, "grad_norm": 0.59765625, "learning_rate": 0.0019446974847385076, "loss": 0.2192, "step": 11658 }, { "epoch": 0.020674143756222414, "grad_norm": 0.72265625, "learning_rate": 0.0019446769408755689, "loss": 0.2595, "step": 11660 }, { "epoch": 0.020677689921532228, "grad_norm": 0.35546875, "learning_rate": 0.0019446563933185042, "loss": 0.2263, "step": 11662 }, { "epoch": 0.020681236086842043, "grad_norm": 0.3984375, "learning_rate": 0.0019446358420674027, "loss": 0.2359, "step": 11664 }, { "epoch": 0.020684782252151857, "grad_norm": 0.9453125, "learning_rate": 0.0019446152871223548, "loss": 0.25, "step": 11666 }, { "epoch": 0.020688328417461672, "grad_norm": 0.421875, "learning_rate": 0.0019445947284834502, "loss": 0.2671, "step": 11668 }, { "epoch": 0.020691874582771486, "grad_norm": 1.9296875, "learning_rate": 0.0019445741661507788, "loss": 0.6032, "step": 11670 }, { "epoch": 0.020695420748081304, "grad_norm": 0.58203125, "learning_rate": 0.0019445536001244305, "loss": 0.4077, "step": 11672 }, { "epoch": 0.02069896691339112, "grad_norm": 0.58203125, "learning_rate": 0.0019445330304044958, "loss": 0.2392, "step": 11674 }, { "epoch": 0.020702513078700933, "grad_norm": 1.7109375, "learning_rate": 0.0019445124569910637, "loss": 0.4682, "step": 11676 }, { "epoch": 0.020706059244010748, "grad_norm": 0.51953125, "learning_rate": 0.0019444918798842247, "loss": 0.2496, "step": 11678 }, { "epoch": 0.020709605409320563, "grad_norm": 0.484375, "learning_rate": 0.001944471299084069, "loss": 0.5108, "step": 11680 }, { "epoch": 0.020713151574630377, "grad_norm": 0.41015625, "learning_rate": 0.0019444507145906862, "loss": 0.2027, "step": 11682 }, { "epoch": 0.020716697739940195, "grad_norm": 0.57421875, "learning_rate": 0.0019444301264041667, "loss": 0.2185, "step": 11684 }, { "epoch": 0.02072024390525001, "grad_norm": 1.34375, "learning_rate": 0.0019444095345246002, "loss": 0.2453, "step": 11686 }, { "epoch": 0.020723790070559824, "grad_norm": 0.53125, "learning_rate": 0.0019443889389520767, "loss": 0.2386, "step": 11688 }, { "epoch": 0.02072733623586964, "grad_norm": 0.34765625, "learning_rate": 0.0019443683396866867, "loss": 0.2393, "step": 11690 }, { "epoch": 0.020730882401179453, "grad_norm": 0.86328125, "learning_rate": 0.00194434773672852, "loss": 0.2187, "step": 11692 }, { "epoch": 0.02073442856648927, "grad_norm": 0.578125, "learning_rate": 0.0019443271300776666, "loss": 0.2132, "step": 11694 }, { "epoch": 0.020737974731799086, "grad_norm": 0.50390625, "learning_rate": 0.0019443065197342168, "loss": 0.2072, "step": 11696 }, { "epoch": 0.0207415208971089, "grad_norm": 0.69140625, "learning_rate": 0.0019442859056982612, "loss": 0.2375, "step": 11698 }, { "epoch": 0.020745067062418715, "grad_norm": 0.27734375, "learning_rate": 0.001944265287969889, "loss": 0.2856, "step": 11700 }, { "epoch": 0.02074861322772853, "grad_norm": 0.5625, "learning_rate": 0.0019442446665491905, "loss": 0.2713, "step": 11702 }, { "epoch": 0.020752159393038344, "grad_norm": 0.359375, "learning_rate": 0.0019442240414362568, "loss": 0.2755, "step": 11704 }, { "epoch": 0.020755705558348162, "grad_norm": 0.419921875, "learning_rate": 0.0019442034126311773, "loss": 0.2994, "step": 11706 }, { "epoch": 0.020759251723657977, "grad_norm": 0.25, "learning_rate": 0.0019441827801340427, "loss": 0.2344, "step": 11708 }, { "epoch": 0.02076279788896779, "grad_norm": 0.86328125, "learning_rate": 0.001944162143944943, "loss": 0.3913, "step": 11710 }, { "epoch": 0.020766344054277606, "grad_norm": 1.640625, "learning_rate": 0.001944141504063968, "loss": 0.2525, "step": 11712 }, { "epoch": 0.02076989021958742, "grad_norm": 2.828125, "learning_rate": 0.0019441208604912088, "loss": 0.3243, "step": 11714 }, { "epoch": 0.020773436384897235, "grad_norm": 0.33984375, "learning_rate": 0.0019441002132267549, "loss": 0.2193, "step": 11716 }, { "epoch": 0.020776982550207053, "grad_norm": 0.314453125, "learning_rate": 0.0019440795622706975, "loss": 0.2688, "step": 11718 }, { "epoch": 0.020780528715516867, "grad_norm": 0.44921875, "learning_rate": 0.0019440589076231258, "loss": 0.2419, "step": 11720 }, { "epoch": 0.020784074880826682, "grad_norm": 0.314453125, "learning_rate": 0.001944038249284131, "loss": 0.2258, "step": 11722 }, { "epoch": 0.020787621046136497, "grad_norm": 0.953125, "learning_rate": 0.0019440175872538032, "loss": 0.2392, "step": 11724 }, { "epoch": 0.02079116721144631, "grad_norm": 0.55078125, "learning_rate": 0.0019439969215322328, "loss": 0.2375, "step": 11726 }, { "epoch": 0.02079471337675613, "grad_norm": 0.30859375, "learning_rate": 0.00194397625211951, "loss": 0.2131, "step": 11728 }, { "epoch": 0.020798259542065944, "grad_norm": 1.1484375, "learning_rate": 0.0019439555790157254, "loss": 0.2618, "step": 11730 }, { "epoch": 0.020801805707375758, "grad_norm": 0.330078125, "learning_rate": 0.0019439349022209692, "loss": 0.2294, "step": 11732 }, { "epoch": 0.020805351872685573, "grad_norm": 0.48828125, "learning_rate": 0.001943914221735332, "loss": 0.1592, "step": 11734 }, { "epoch": 0.020808898037995387, "grad_norm": 0.42578125, "learning_rate": 0.0019438935375589044, "loss": 0.1544, "step": 11736 }, { "epoch": 0.020812444203305202, "grad_norm": 2.0, "learning_rate": 0.0019438728496917763, "loss": 0.4247, "step": 11738 }, { "epoch": 0.02081599036861502, "grad_norm": 0.359375, "learning_rate": 0.0019438521581340387, "loss": 0.2018, "step": 11740 }, { "epoch": 0.020819536533924834, "grad_norm": 0.36328125, "learning_rate": 0.001943831462885782, "loss": 0.1906, "step": 11742 }, { "epoch": 0.02082308269923465, "grad_norm": 3.078125, "learning_rate": 0.0019438107639470966, "loss": 0.3867, "step": 11744 }, { "epoch": 0.020826628864544464, "grad_norm": 0.59375, "learning_rate": 0.001943790061318073, "loss": 0.2404, "step": 11746 }, { "epoch": 0.020830175029854278, "grad_norm": 0.43359375, "learning_rate": 0.0019437693549988018, "loss": 0.2527, "step": 11748 }, { "epoch": 0.020833721195164093, "grad_norm": 1.546875, "learning_rate": 0.0019437486449893737, "loss": 0.3323, "step": 11750 }, { "epoch": 0.02083726736047391, "grad_norm": 0.248046875, "learning_rate": 0.0019437279312898791, "loss": 0.2413, "step": 11752 }, { "epoch": 0.020840813525783725, "grad_norm": 0.279296875, "learning_rate": 0.0019437072139004087, "loss": 0.2021, "step": 11754 }, { "epoch": 0.02084435969109354, "grad_norm": 0.8359375, "learning_rate": 0.0019436864928210527, "loss": 0.1949, "step": 11756 }, { "epoch": 0.020847905856403354, "grad_norm": 0.515625, "learning_rate": 0.0019436657680519023, "loss": 0.2339, "step": 11758 }, { "epoch": 0.02085145202171317, "grad_norm": 0.46875, "learning_rate": 0.0019436450395930477, "loss": 0.2197, "step": 11760 }, { "epoch": 0.020854998187022987, "grad_norm": 0.27734375, "learning_rate": 0.0019436243074445801, "loss": 0.2461, "step": 11762 }, { "epoch": 0.0208585443523328, "grad_norm": 1.1640625, "learning_rate": 0.0019436035716065897, "loss": 0.3151, "step": 11764 }, { "epoch": 0.020862090517642616, "grad_norm": 1.53125, "learning_rate": 0.0019435828320791668, "loss": 0.3223, "step": 11766 }, { "epoch": 0.02086563668295243, "grad_norm": 3.875, "learning_rate": 0.0019435620888624031, "loss": 0.4664, "step": 11768 }, { "epoch": 0.020869182848262245, "grad_norm": 0.7578125, "learning_rate": 0.0019435413419563888, "loss": 0.3204, "step": 11770 }, { "epoch": 0.02087272901357206, "grad_norm": 0.51171875, "learning_rate": 0.0019435205913612146, "loss": 0.2213, "step": 11772 }, { "epoch": 0.020876275178881878, "grad_norm": 0.423828125, "learning_rate": 0.0019434998370769713, "loss": 0.2593, "step": 11774 }, { "epoch": 0.020879821344191692, "grad_norm": 0.3828125, "learning_rate": 0.0019434790791037495, "loss": 0.2493, "step": 11776 }, { "epoch": 0.020883367509501507, "grad_norm": 0.306640625, "learning_rate": 0.0019434583174416402, "loss": 0.1507, "step": 11778 }, { "epoch": 0.02088691367481132, "grad_norm": 0.341796875, "learning_rate": 0.0019434375520907346, "loss": 0.264, "step": 11780 }, { "epoch": 0.020890459840121136, "grad_norm": 1.203125, "learning_rate": 0.0019434167830511228, "loss": 0.2504, "step": 11782 }, { "epoch": 0.02089400600543095, "grad_norm": 0.435546875, "learning_rate": 0.0019433960103228958, "loss": 0.1792, "step": 11784 }, { "epoch": 0.02089755217074077, "grad_norm": 0.255859375, "learning_rate": 0.0019433752339061442, "loss": 0.2101, "step": 11786 }, { "epoch": 0.020901098336050583, "grad_norm": 1.1640625, "learning_rate": 0.0019433544538009597, "loss": 0.1699, "step": 11788 }, { "epoch": 0.020904644501360398, "grad_norm": 0.59375, "learning_rate": 0.0019433336700074328, "loss": 0.2343, "step": 11790 }, { "epoch": 0.020908190666670212, "grad_norm": 0.52734375, "learning_rate": 0.0019433128825256541, "loss": 0.2101, "step": 11792 }, { "epoch": 0.020911736831980027, "grad_norm": 0.4765625, "learning_rate": 0.0019432920913557148, "loss": 0.2671, "step": 11794 }, { "epoch": 0.020915282997289845, "grad_norm": 1.7890625, "learning_rate": 0.0019432712964977058, "loss": 0.2376, "step": 11796 }, { "epoch": 0.02091882916259966, "grad_norm": 0.302734375, "learning_rate": 0.0019432504979517177, "loss": 0.2463, "step": 11798 }, { "epoch": 0.020922375327909474, "grad_norm": 0.9921875, "learning_rate": 0.001943229695717842, "loss": 0.3419, "step": 11800 }, { "epoch": 0.02092592149321929, "grad_norm": 0.5546875, "learning_rate": 0.0019432088897961693, "loss": 0.1954, "step": 11802 }, { "epoch": 0.020929467658529103, "grad_norm": 0.41796875, "learning_rate": 0.0019431880801867908, "loss": 0.1908, "step": 11804 }, { "epoch": 0.020933013823838918, "grad_norm": 0.359375, "learning_rate": 0.0019431672668897974, "loss": 0.2249, "step": 11806 }, { "epoch": 0.020936559989148736, "grad_norm": 0.71484375, "learning_rate": 0.00194314644990528, "loss": 0.2014, "step": 11808 }, { "epoch": 0.02094010615445855, "grad_norm": 0.76953125, "learning_rate": 0.0019431256292333297, "loss": 0.277, "step": 11810 }, { "epoch": 0.020943652319768365, "grad_norm": 0.388671875, "learning_rate": 0.0019431048048740378, "loss": 0.4032, "step": 11812 }, { "epoch": 0.02094719848507818, "grad_norm": 0.267578125, "learning_rate": 0.0019430839768274954, "loss": 0.2798, "step": 11814 }, { "epoch": 0.020950744650387994, "grad_norm": 2.34375, "learning_rate": 0.001943063145093793, "loss": 0.5086, "step": 11816 }, { "epoch": 0.02095429081569781, "grad_norm": 0.546875, "learning_rate": 0.0019430423096730223, "loss": 0.194, "step": 11818 }, { "epoch": 0.020957836981007626, "grad_norm": 0.359375, "learning_rate": 0.0019430214705652745, "loss": 0.1704, "step": 11820 }, { "epoch": 0.02096138314631744, "grad_norm": 0.5703125, "learning_rate": 0.0019430006277706402, "loss": 0.1989, "step": 11822 }, { "epoch": 0.020964929311627255, "grad_norm": 2.0, "learning_rate": 0.0019429797812892107, "loss": 0.2876, "step": 11824 }, { "epoch": 0.02096847547693707, "grad_norm": 0.3515625, "learning_rate": 0.0019429589311210776, "loss": 0.2928, "step": 11826 }, { "epoch": 0.020972021642246885, "grad_norm": 0.92578125, "learning_rate": 0.0019429380772663317, "loss": 0.2403, "step": 11828 }, { "epoch": 0.020975567807556703, "grad_norm": 1.328125, "learning_rate": 0.0019429172197250645, "loss": 0.2353, "step": 11830 }, { "epoch": 0.020979113972866517, "grad_norm": 0.70703125, "learning_rate": 0.0019428963584973665, "loss": 0.1783, "step": 11832 }, { "epoch": 0.02098266013817633, "grad_norm": 1.1328125, "learning_rate": 0.0019428754935833297, "loss": 0.2238, "step": 11834 }, { "epoch": 0.020986206303486146, "grad_norm": 0.5390625, "learning_rate": 0.001942854624983045, "loss": 0.2795, "step": 11836 }, { "epoch": 0.02098975246879596, "grad_norm": 0.80859375, "learning_rate": 0.0019428337526966038, "loss": 0.252, "step": 11838 }, { "epoch": 0.020993298634105775, "grad_norm": 1.0625, "learning_rate": 0.0019428128767240973, "loss": 0.2096, "step": 11840 }, { "epoch": 0.020996844799415593, "grad_norm": 0.65234375, "learning_rate": 0.0019427919970656168, "loss": 0.1612, "step": 11842 }, { "epoch": 0.021000390964725408, "grad_norm": 0.52734375, "learning_rate": 0.001942771113721254, "loss": 0.3048, "step": 11844 }, { "epoch": 0.021003937130035222, "grad_norm": 0.29296875, "learning_rate": 0.0019427502266910997, "loss": 0.239, "step": 11846 }, { "epoch": 0.021007483295345037, "grad_norm": 0.3671875, "learning_rate": 0.0019427293359752453, "loss": 0.2246, "step": 11848 }, { "epoch": 0.02101102946065485, "grad_norm": 0.625, "learning_rate": 0.0019427084415737826, "loss": 0.1583, "step": 11850 }, { "epoch": 0.021014575625964666, "grad_norm": 0.470703125, "learning_rate": 0.0019426875434868022, "loss": 0.2764, "step": 11852 }, { "epoch": 0.021018121791274484, "grad_norm": 0.451171875, "learning_rate": 0.0019426666417143965, "loss": 0.1836, "step": 11854 }, { "epoch": 0.0210216679565843, "grad_norm": 0.78515625, "learning_rate": 0.0019426457362566561, "loss": 0.2434, "step": 11856 }, { "epoch": 0.021025214121894113, "grad_norm": 0.380859375, "learning_rate": 0.001942624827113673, "loss": 0.2406, "step": 11858 }, { "epoch": 0.021028760287203928, "grad_norm": 0.353515625, "learning_rate": 0.001942603914285538, "loss": 0.2339, "step": 11860 }, { "epoch": 0.021032306452513742, "grad_norm": 1.1875, "learning_rate": 0.0019425829977723428, "loss": 0.2634, "step": 11862 }, { "epoch": 0.02103585261782356, "grad_norm": 0.318359375, "learning_rate": 0.0019425620775741792, "loss": 0.1601, "step": 11864 }, { "epoch": 0.021039398783133375, "grad_norm": 0.38671875, "learning_rate": 0.001942541153691139, "loss": 0.2079, "step": 11866 }, { "epoch": 0.02104294494844319, "grad_norm": 0.53125, "learning_rate": 0.0019425202261233124, "loss": 0.2634, "step": 11868 }, { "epoch": 0.021046491113753004, "grad_norm": 1.4296875, "learning_rate": 0.001942499294870792, "loss": 0.2744, "step": 11870 }, { "epoch": 0.02105003727906282, "grad_norm": 0.40625, "learning_rate": 0.0019424783599336693, "loss": 0.2494, "step": 11872 }, { "epoch": 0.021053583444372633, "grad_norm": 0.380859375, "learning_rate": 0.0019424574213120355, "loss": 0.2023, "step": 11874 }, { "epoch": 0.02105712960968245, "grad_norm": 0.37890625, "learning_rate": 0.001942436479005982, "loss": 0.2424, "step": 11876 }, { "epoch": 0.021060675774992266, "grad_norm": 0.95703125, "learning_rate": 0.0019424155330156011, "loss": 0.2661, "step": 11878 }, { "epoch": 0.02106422194030208, "grad_norm": 2.546875, "learning_rate": 0.0019423945833409839, "loss": 0.4265, "step": 11880 }, { "epoch": 0.021067768105611895, "grad_norm": 0.310546875, "learning_rate": 0.0019423736299822218, "loss": 0.149, "step": 11882 }, { "epoch": 0.02107131427092171, "grad_norm": 0.609375, "learning_rate": 0.001942352672939407, "loss": 0.1588, "step": 11884 }, { "epoch": 0.021074860436231524, "grad_norm": 0.53125, "learning_rate": 0.001942331712212631, "loss": 0.205, "step": 11886 }, { "epoch": 0.021078406601541342, "grad_norm": 0.46484375, "learning_rate": 0.0019423107478019853, "loss": 0.1862, "step": 11888 }, { "epoch": 0.021081952766851157, "grad_norm": 0.66015625, "learning_rate": 0.0019422897797075616, "loss": 0.2353, "step": 11890 }, { "epoch": 0.02108549893216097, "grad_norm": 0.37109375, "learning_rate": 0.0019422688079294517, "loss": 0.1906, "step": 11892 }, { "epoch": 0.021089045097470786, "grad_norm": 1.3203125, "learning_rate": 0.0019422478324677473, "loss": 0.2766, "step": 11894 }, { "epoch": 0.0210925912627806, "grad_norm": 0.375, "learning_rate": 0.00194222685332254, "loss": 0.3606, "step": 11896 }, { "epoch": 0.021096137428090418, "grad_norm": 1.1796875, "learning_rate": 0.0019422058704939218, "loss": 0.4156, "step": 11898 }, { "epoch": 0.021099683593400233, "grad_norm": 0.7890625, "learning_rate": 0.0019421848839819844, "loss": 0.17, "step": 11900 }, { "epoch": 0.021103229758710047, "grad_norm": 0.37109375, "learning_rate": 0.0019421638937868193, "loss": 0.2583, "step": 11902 }, { "epoch": 0.021106775924019862, "grad_norm": 1.9140625, "learning_rate": 0.0019421428999085188, "loss": 0.438, "step": 11904 }, { "epoch": 0.021110322089329676, "grad_norm": 1.71875, "learning_rate": 0.0019421219023471742, "loss": 0.3924, "step": 11906 }, { "epoch": 0.02111386825463949, "grad_norm": 1.1015625, "learning_rate": 0.0019421009011028776, "loss": 0.2796, "step": 11908 }, { "epoch": 0.02111741441994931, "grad_norm": 1.1328125, "learning_rate": 0.0019420798961757206, "loss": 0.2339, "step": 11910 }, { "epoch": 0.021120960585259124, "grad_norm": 1.90625, "learning_rate": 0.0019420588875657958, "loss": 0.2266, "step": 11912 }, { "epoch": 0.021124506750568938, "grad_norm": 0.81640625, "learning_rate": 0.0019420378752731942, "loss": 0.2492, "step": 11914 }, { "epoch": 0.021128052915878753, "grad_norm": 2.21875, "learning_rate": 0.0019420168592980082, "loss": 0.4295, "step": 11916 }, { "epoch": 0.021131599081188567, "grad_norm": 1.5859375, "learning_rate": 0.0019419958396403294, "loss": 0.3756, "step": 11918 }, { "epoch": 0.021135145246498382, "grad_norm": 2.25, "learning_rate": 0.0019419748163002498, "loss": 0.2401, "step": 11920 }, { "epoch": 0.0211386914118082, "grad_norm": 0.703125, "learning_rate": 0.0019419537892778618, "loss": 0.2321, "step": 11922 }, { "epoch": 0.021142237577118014, "grad_norm": 0.333984375, "learning_rate": 0.0019419327585732565, "loss": 0.2658, "step": 11924 }, { "epoch": 0.02114578374242783, "grad_norm": 1.203125, "learning_rate": 0.0019419117241865267, "loss": 0.5251, "step": 11926 }, { "epoch": 0.021149329907737643, "grad_norm": 1.8828125, "learning_rate": 0.001941890686117764, "loss": 0.3023, "step": 11928 }, { "epoch": 0.021152876073047458, "grad_norm": 0.5546875, "learning_rate": 0.0019418696443670605, "loss": 0.2206, "step": 11930 }, { "epoch": 0.021156422238357276, "grad_norm": 0.3359375, "learning_rate": 0.001941848598934508, "loss": 0.2282, "step": 11932 }, { "epoch": 0.02115996840366709, "grad_norm": 0.306640625, "learning_rate": 0.0019418275498201988, "loss": 0.2521, "step": 11934 }, { "epoch": 0.021163514568976905, "grad_norm": 0.53125, "learning_rate": 0.001941806497024225, "loss": 0.2321, "step": 11936 }, { "epoch": 0.02116706073428672, "grad_norm": 0.359375, "learning_rate": 0.0019417854405466787, "loss": 0.248, "step": 11938 }, { "epoch": 0.021170606899596534, "grad_norm": 0.7109375, "learning_rate": 0.0019417643803876516, "loss": 0.2089, "step": 11940 }, { "epoch": 0.02117415306490635, "grad_norm": 0.51953125, "learning_rate": 0.001941743316547236, "loss": 0.1851, "step": 11942 }, { "epoch": 0.021177699230216167, "grad_norm": 0.37890625, "learning_rate": 0.0019417222490255247, "loss": 0.205, "step": 11944 }, { "epoch": 0.02118124539552598, "grad_norm": 0.357421875, "learning_rate": 0.0019417011778226083, "loss": 0.2196, "step": 11946 }, { "epoch": 0.021184791560835796, "grad_norm": 0.6796875, "learning_rate": 0.0019416801029385805, "loss": 0.2138, "step": 11948 }, { "epoch": 0.02118833772614561, "grad_norm": 0.328125, "learning_rate": 0.0019416590243735328, "loss": 0.2103, "step": 11950 }, { "epoch": 0.021191883891455425, "grad_norm": 0.6640625, "learning_rate": 0.001941637942127557, "loss": 0.1775, "step": 11952 }, { "epoch": 0.02119543005676524, "grad_norm": 1.0546875, "learning_rate": 0.001941616856200746, "loss": 0.3328, "step": 11954 }, { "epoch": 0.021198976222075058, "grad_norm": 0.408203125, "learning_rate": 0.0019415957665931917, "loss": 0.2312, "step": 11956 }, { "epoch": 0.021202522387384872, "grad_norm": 0.26953125, "learning_rate": 0.0019415746733049864, "loss": 0.1949, "step": 11958 }, { "epoch": 0.021206068552694687, "grad_norm": 0.57421875, "learning_rate": 0.0019415535763362224, "loss": 0.2523, "step": 11960 }, { "epoch": 0.0212096147180045, "grad_norm": 0.275390625, "learning_rate": 0.0019415324756869917, "loss": 0.2221, "step": 11962 }, { "epoch": 0.021213160883314316, "grad_norm": 0.66796875, "learning_rate": 0.001941511371357387, "loss": 0.2071, "step": 11964 }, { "epoch": 0.021216707048624134, "grad_norm": 0.275390625, "learning_rate": 0.0019414902633475002, "loss": 0.1605, "step": 11966 }, { "epoch": 0.02122025321393395, "grad_norm": 0.447265625, "learning_rate": 0.0019414691516574237, "loss": 0.2008, "step": 11968 }, { "epoch": 0.021223799379243763, "grad_norm": 0.41796875, "learning_rate": 0.0019414480362872502, "loss": 0.1891, "step": 11970 }, { "epoch": 0.021227345544553577, "grad_norm": 0.490234375, "learning_rate": 0.0019414269172370715, "loss": 0.2498, "step": 11972 }, { "epoch": 0.021230891709863392, "grad_norm": 0.42578125, "learning_rate": 0.0019414057945069804, "loss": 0.2774, "step": 11974 }, { "epoch": 0.021234437875173207, "grad_norm": 0.265625, "learning_rate": 0.001941384668097069, "loss": 0.1612, "step": 11976 }, { "epoch": 0.021237984040483025, "grad_norm": 0.9921875, "learning_rate": 0.0019413635380074296, "loss": 0.2101, "step": 11978 }, { "epoch": 0.02124153020579284, "grad_norm": 0.69140625, "learning_rate": 0.001941342404238155, "loss": 0.3062, "step": 11980 }, { "epoch": 0.021245076371102654, "grad_norm": 0.50390625, "learning_rate": 0.0019413212667893376, "loss": 0.2146, "step": 11982 }, { "epoch": 0.021248622536412468, "grad_norm": 0.4921875, "learning_rate": 0.0019413001256610696, "loss": 0.1958, "step": 11984 }, { "epoch": 0.021252168701722283, "grad_norm": 3.40625, "learning_rate": 0.0019412789808534434, "loss": 0.1789, "step": 11986 }, { "epoch": 0.021255714867032097, "grad_norm": 0.703125, "learning_rate": 0.0019412578323665518, "loss": 0.241, "step": 11988 }, { "epoch": 0.021259261032341915, "grad_norm": 0.466796875, "learning_rate": 0.0019412366802004871, "loss": 0.2686, "step": 11990 }, { "epoch": 0.02126280719765173, "grad_norm": 0.734375, "learning_rate": 0.0019412155243553415, "loss": 0.2405, "step": 11992 }, { "epoch": 0.021266353362961544, "grad_norm": 0.5, "learning_rate": 0.001941194364831208, "loss": 0.2495, "step": 11994 }, { "epoch": 0.02126989952827136, "grad_norm": 0.1669921875, "learning_rate": 0.001941173201628179, "loss": 0.1895, "step": 11996 }, { "epoch": 0.021273445693581174, "grad_norm": 0.361328125, "learning_rate": 0.001941152034746347, "loss": 0.1647, "step": 11998 }, { "epoch": 0.02127699185889099, "grad_norm": 0.68359375, "learning_rate": 0.0019411308641858046, "loss": 0.2204, "step": 12000 } ], "logging_steps": 2, "max_steps": 96010, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 4000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 6.944256645894636e+17, "train_batch_size": 4, "trial_name": null, "trial_params": null }